From 71898246d4287d33d294a7a47cd6bafacc3d376f Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Thu, 2 Mar 2017 22:05:05 +0000 Subject: MAINT: Split up the lapack_lite files more sensibly Also uses this splitting as an excuse to ditch the _lite suffix, in favor of a f2c_ prefix for all generated files. Before: * `zlapack_lite.c` - Functions for the `complex128` type. * `dlapack_lite.c` - Every other lapack function After: * `f2c_z_lapack.c` - Functions for the `complex128` type. * `f2c_c_lapack.c` - Functions for the `complex64` type. * `f2c_d_lapack.c` - Functions for the `float64` type. * `f2c_s_lapack.c` - Functions for the `float32` type. * `f2c_lapack.c` - Every other lapack function --- numpy/linalg/lapack_lite/README.rst | 8 +- numpy/linalg/lapack_lite/blas_lite.c | 21082 ------- numpy/linalg/lapack_lite/dlapack_lite.c | 96068 ------------------------------ numpy/linalg/lapack_lite/f2c.c | 680 + numpy/linalg/lapack_lite/f2c_blas.c | 21082 +++++++ numpy/linalg/lapack_lite/f2c_c_lapack.c | 24264 ++++++++ numpy/linalg/lapack_lite/f2c_d_lapack.c | 36180 +++++++++++ numpy/linalg/lapack_lite/f2c_lapack.c | 797 + numpy/linalg/lapack_lite/f2c_lite.c | 671 - numpy/linalg/lapack_lite/f2c_s_lapack.c | 34925 +++++++++++ numpy/linalg/lapack_lite/f2c_z_lapack.c | 26995 +++++++++ numpy/linalg/lapack_lite/make_lite.py | 18 +- numpy/linalg/lapack_lite/zlapack_lite.c | 26995 --------- numpy/linalg/setup.py | 11 +- 14 files changed, 144945 insertions(+), 144831 deletions(-) delete mode 100644 numpy/linalg/lapack_lite/blas_lite.c delete mode 100644 numpy/linalg/lapack_lite/dlapack_lite.c create mode 100644 numpy/linalg/lapack_lite/f2c.c create mode 100644 numpy/linalg/lapack_lite/f2c_blas.c create mode 100644 numpy/linalg/lapack_lite/f2c_c_lapack.c create mode 100644 numpy/linalg/lapack_lite/f2c_d_lapack.c create mode 100644 numpy/linalg/lapack_lite/f2c_lapack.c delete mode 100644 numpy/linalg/lapack_lite/f2c_lite.c create mode 100644 numpy/linalg/lapack_lite/f2c_s_lapack.c create mode 100644 numpy/linalg/lapack_lite/f2c_z_lapack.c delete mode 100644 numpy/linalg/lapack_lite/zlapack_lite.c (limited to 'numpy') diff --git a/numpy/linalg/lapack_lite/README.rst b/numpy/linalg/lapack_lite/README.rst index 144d7209e..16fa06396 100644 --- a/numpy/linalg/lapack_lite/README.rst +++ b/numpy/linalg/lapack_lite/README.rst @@ -3,8 +3,7 @@ Regenerating lapack_lite source :Author: David M. Cooke -The ``numpy/linalg/blas_lite.c``, ``numpy/linalg/dlapack_lite.c``, and -``numpy/linalg/zlapack_lite.c`` are ``f2c``'d versions of the LAPACK routines +The ``numpy/linalg/f2c_*.c`` files are ``f2c``'d versions of the LAPACK routines required by the ``LinearAlgebra`` module, and wrapped by the ``lapack_lite`` module. The scripts in this directory can be used to create these files automatically from a directory of LAPACK source files. @@ -23,9 +22,8 @@ properly. Assuming that you have an unpacked LAPACK source tree in $ python2 ./make_lite.py wrapped_routines ~/LAPACK new-lite/ This will grab the right routines, with dependencies, put them into the -appropriate ``blas_lite.f``, ``dlapack_lite.f``, or ``zlapack_lite.f`` files, -run ``f2c`` over them, then do some scrubbing similar to that done to -generate the CLAPACK_ distribution. +appropriate ``f2c_*.f`` files, run ``f2c`` over them, then do some scrubbing +similar to that done to generate the CLAPACK_ distribution. .. _CLAPACK: http://netlib.org/clapack/index.html diff --git a/numpy/linalg/lapack_lite/blas_lite.c b/numpy/linalg/lapack_lite/blas_lite.c deleted file mode 100644 index 6ef3bf0a3..000000000 --- a/numpy/linalg/lapack_lite/blas_lite.c +++ /dev/null @@ -1,21082 +0,0 @@ -/* -NOTE: This is generated code. Look in Misc/lapack_lite for information on - remaking this file. -*/ -#include "f2c.h" - -#ifdef HAVE_CONFIG -#include "config.h" -#else -extern doublereal dlamch_(char *); -#define EPSILON dlamch_("Epsilon") -#define SAFEMINIMUM dlamch_("Safe minimum") -#define PRECISION dlamch_("Precision") -#define BASE dlamch_("Base") -#endif - -extern doublereal dlapy2_(doublereal *x, doublereal *y); - -/* -f2c knows the exact rules for precedence, and so omits parentheses where not -strictly necessary. Since this is generated code, we don't really care if -it's readable, and we know what is written is correct. So don't warn about -them. -*/ -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wparentheses" -#endif - - -/* Table of constant values */ - -static complex c_b21 = {1.f,0.f}; -static doublecomplex c_b1071 = {1.,0.}; - -/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer * - incx, complex *cy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - real r__1, r__2; - complex q__1, q__2; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, ix, iy; - - -/* - constant times a vector plus a vector. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - i__4 = ix; - q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ - i__4].i + ca->i * cx[i__4].r; - q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; - cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - i__4 = i__; - q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ - i__4].i + ca->i * cx[i__4].r; - q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; - cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; -/* L30: */ - } - return 0; -} /* caxpy_ */ - -/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex * - cy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Local variables */ - static integer i__, ix, iy; - - -/* - copies a vector, x, to a vector, y. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = ix; - cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; -/* L30: */ - } - return 0; -} /* ccopy_ */ - -/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer - *incx, complex *cy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, ix, iy; - static complex ctemp; - - -/* - forms the dot product of two vectors, conjugating the first - vector. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - ctemp.r = 0.f, ctemp.i = 0.f; - ret_val->r = 0.f, ret_val->i = 0.f; - if (*n <= 0) { - return ; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - r_cnjg(&q__3, &cx[ix]); - i__2 = iy; - q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * - cy[i__2].i + q__3.i * cy[i__2].r; - q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val->r = ctemp.r, ret_val->i = ctemp.i; - return ; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - r_cnjg(&q__3, &cx[i__]); - i__2 = i__; - q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * - cy[i__2].i + q__3.i * cy[i__2].r; - q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; -/* L30: */ - } - ret_val->r = ctemp.r, ret_val->i = ctemp.i; - return ; -} /* cdotc_ */ - -/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer - *incx, complex *cy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - complex q__1, q__2; - - /* Local variables */ - static integer i__, ix, iy; - static complex ctemp; - - -/* - forms the dot product of two vectors. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - ctemp.r = 0.f, ctemp.i = 0.f; - ret_val->r = 0.f, ret_val->i = 0.f; - if (*n <= 0) { - return ; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - i__3 = iy; - q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = - cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; - q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val->r = ctemp.r, ret_val->i = ctemp.i; - return ; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = - cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; - q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; -/* L30: */ - } - ret_val->r = ctemp.r, ret_val->i = ctemp.i; - return ; -} /* cdotu_ */ - -/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer * - n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, - integer *ldb, complex *beta, complex *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5, i__6; - complex q__1, q__2, q__3, q__4; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, l, info; - static logical nota, notb; - static complex temp; - static logical conja, conjb; - static integer ncola; - extern logical lsame_(char *, char *); - static integer nrowa, nrowb; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CGEMM performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - Parameters - ========== - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n', op( A ) = A. - - TRANSA = 'T' or 't', op( A ) = A'. - - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - - Unchanged on exit. - - TRANSB - CHARACTER*1. - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - - TRANSB = 'N' or 'n', op( B ) = B. - - TRANSB = 'T' or 't', op( B ) = B'. - - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is - k when TRANSA = 'N' or 'n', and is m otherwise. - Before entry with TRANSA = 'N' or 'n', the leading m by k - part of the array A must contain the matrix A, otherwise - the leading k by m part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANSA = 'N' or 'n' then - LDA must be at least max( 1, m ), otherwise LDA must be at - least max( 1, k ). - Unchanged on exit. - - B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANSB = 'N' or 'n' then - LDB must be at least max( 1, k ), otherwise LDB must be at - least max( 1, n ). - Unchanged on exit. - - BETA - COMPLEX . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - Unchanged on exit. - - C - COMPLEX array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*op( B ) + beta*C ). - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Set NOTA and NOTB as true if A and B respectively are not - conjugated or transposed, set CONJA and CONJB as true if A and - B respectively are to be transposed but not conjugated and set - NROWA, NCOLA and NROWB as the number of rows and columns of A - and the number of rows of B respectively. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - nota = lsame_(transa, "N"); - notb = lsame_(transb, "N"); - conja = lsame_(transa, "C"); - conjb = lsame_(transb, "C"); - if (nota) { - nrowa = *m; - ncola = *k; - } else { - nrowa = *k; - ncola = *m; - } - if (notb) { - nrowb = *k; - } else { - nrowb = *n; - } - -/* Test the input parameters. */ - - info = 0; - if (! nota && ! conja && ! lsame_(transa, "T")) { - info = 1; - } else if (! notb && ! conjb && ! lsame_(transb, "T")) { - info = 2; - } else if (*m < 0) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < max(1,nrowa)) { - info = 8; - } else if (*ldb < max(1,nrowb)) { - info = 10; - } else if (*ldc < max(1,*m)) { - info = 13; - } - if (info != 0) { - xerbla_("CGEMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) - && (beta->r == 1.f && beta->i == 0.f)) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0.f && alpha->i == 0.f) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, - q__1.i = beta->r * c__[i__4].i + beta->i * c__[ - i__4].r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L30: */ - } -/* L40: */ - } - } - return 0; - } - -/* Start the operations. */ - - if (notb) { - if (nota) { - -/* Form C := alpha*A*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0.f && beta->i == 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L50: */ - } - } else if (beta->r != 1.f || beta->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L60: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = l + j * b_dim1; - if (b[i__3].r != 0.f || b[i__3].i != 0.f) { - i__3 = l + j * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - q__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - q__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] - .i + q__2.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L70: */ - } - } -/* L80: */ - } -/* L90: */ - } - } else if (conja) { - -/* Form C := alpha*conjg( A' )*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, - q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L110: */ - } -/* L120: */ - } - } else { - -/* Form C := alpha*A'*B + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - i__5 = l + j * b_dim1; - q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] - .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] - .i * b[i__5].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L130: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L140: */ - } -/* L150: */ - } - } - } else if (nota) { - if (conjb) { - -/* Form C := alpha*A*conjg( B' ) + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0.f && beta->i == 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L160: */ - } - } else if (beta->r != 1.f || beta->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L170: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * b_dim1; - if (b[i__3].r != 0.f || b[i__3].i != 0.f) { - r_cnjg(&q__2, &b[j + l * b_dim1]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, - q__1.i = alpha->r * q__2.i + alpha->i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - q__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] - .i + q__2.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L180: */ - } - } -/* L190: */ - } -/* L200: */ - } - } else { - -/* Form C := alpha*A*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0.f && beta->i == 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L210: */ - } - } else if (beta->r != 1.f || beta->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L220: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * b_dim1; - if (b[i__3].r != 0.f || b[i__3].i != 0.f) { - i__3 = j + l * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - q__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - q__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] - .i + q__2.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L230: */ - } - } -/* L240: */ - } -/* L250: */ - } - } - } else if (conja) { - if (conjb) { - -/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - r_cnjg(&q__4, &b[j + l * b_dim1]); - q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = - q__3.r * q__4.i + q__3.i * q__4.r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L260: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L270: */ - } -/* L280: */ - } - } else { - -/* Form C := alpha*conjg( A' )*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = j + l * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, - q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L290: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L300: */ - } -/* L310: */ - } - } - } else { - if (conjb) { - -/* Form C := alpha*A'*conjg( B' ) + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - r_cnjg(&q__3, &b[j + l * b_dim1]); - q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i, - q__2.i = a[i__4].r * q__3.i + a[i__4].i * - q__3.r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L320: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L330: */ - } -/* L340: */ - } - } else { - -/* Form C := alpha*A'*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - i__5 = j + l * b_dim1; - q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] - .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] - .i * b[i__5].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L350: */ - } - if (beta->r == 0.f && beta->i == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, - q__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, q__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L360: */ - } -/* L370: */ - } - } - } - - return 0; - -/* End of CGEMM . */ - -} /* cgemm_ */ - -/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex * - alpha, complex *a, integer *lda, complex *x, integer *incx, complex * - beta, complex *y, integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static complex temp; - static integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj; - - -/* - Purpose - ======= - - CGEMV performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or - - y := alpha*conjg( A' )*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - Parameters - ========== - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - - TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - X - COMPLEX array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - COMPLEX . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - COMPLEX array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("CGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r - == 1.f && beta->i == 0.f)) { - return 0; - } - - noconj = lsame_(trans, "T"); - -/* - Set LENX and LENY, the lengths of the vectors x and y, and set - up the start points in X and Y. -*/ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. - - First form y := beta*y. -*/ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - temp.r = q__1.r, temp.i = q__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + - q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - temp.r = q__1.r, temp.i = q__1.i; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + - q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = 0.f, temp.i = 0.f; - if (noconj) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] - .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] - .i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L90: */ - } - } else { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - } - i__2 = jy; - i__3 = jy; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = - alpha->r * temp.i + alpha->i * temp.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jy += *incy; -/* L110: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = 0.f, temp.i = 0.f; - ix = kx; - if (noconj) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = ix; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] - .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] - .i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L120: */ - } - } else { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L130: */ - } - } - i__2 = jy; - i__3 = jy; - q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = - alpha->r * temp.i + alpha->i * temp.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jy += *incy; -/* L140: */ - } - } - } - - return 0; - -/* End of CGEMV . */ - -} /* cgemv_ */ - -/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex * - x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static complex temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CGERC performs the rank 1 operation - - A := alpha*x*conjg( y' ) + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("CGERC ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0.f || y[i__2].i != 0.f) { - r_cnjg(&q__2, &y[jy]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0.f || y[i__2].i != 0.f) { - r_cnjg(&q__2, &y[jy]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of CGERC . */ - -} /* cgerc_ */ - -/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex * - x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2; - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static complex temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CGERU performs the rank 1 operation - - A := alpha*x*y' + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("CGERU ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0.f || y[i__2].i != 0.f) { - i__2 = jy; - q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = - alpha->r * y[i__2].i + alpha->i * y[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0.f || y[i__2].i != 0.f) { - i__2 = jy; - q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = - alpha->r * y[i__2].i + alpha->i * y[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of CGERU . */ - -} /* cgeru_ */ - -/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex * - a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, - integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static complex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CHEMV performs the matrix-vector operation - - y := alpha*A*x + beta*y, - - where alpha and beta are scalars, x and y are n element vectors and - A is an n by n hermitian matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of A is not referenced. - Note that the imaginary parts of the diagonal elements need - not be set and are assumed to be zero. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - COMPLEX . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. On exit, Y is overwritten by the updated - vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*lda < max(1,*n)) { - info = 5; - } else if (*incx == 0) { - info = 7; - } else if (*incy == 0) { - info = 10; - } - if (info != 0) { - xerbla_("CHEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && - beta->i == 0.f)) { - return 0; - } - -/* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. - - First form y := beta*y. -*/ - - if (beta->r != 1.f || beta->i != 0.f) { - if (*incy == 1) { - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0.f, y[i__2].i = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0.f && beta->i == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0.f, y[i__2].i = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - if (lsame_(uplo, "U")) { - -/* Form y when A is stored in upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - r__1 = a[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = j + j * a_dim1; - r__1 = a[i__4].r; - q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; - q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; - q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } else { - -/* Form y when A is stored in lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - r__1 = a[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L90: */ - } - i__2 = j; - i__3 = j; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = q__1.r, temp1.i = q__1.i; - temp2.r = 0.f, temp2.i = 0.f; - i__2 = jy; - i__3 = jy; - i__4 = j + j * a_dim1; - r__1 = a[i__4].r; - q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; - y[i__3].r = q__1.r, y[i__3].i = q__1.i; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = - q__3.r * x[i__3].i + q__3.i * x[i__3].r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L110: */ - } - i__2 = jy; - i__3 = jy; - q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; - y[i__2].r = q__1.r, y[i__2].i = q__1.i; - jx += *incx; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of CHEMV . */ - -} /* chemv_ */ - -/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex * - x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static complex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CHER2 performs the hermitian rank 2 operation - - A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, - - where alpha is a scalar, x and y are n element vectors and A is an n - by n hermitian matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of A is not referenced. On exit, the - upper triangular part of the array A is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of A is not referenced. On exit, the - lower triangular part of the array A is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*n)) { - info = 9; - } - if (info != 0) { - xerbla_("CHER2 ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) { - return 0; - } - -/* - Set up the start points in X and Y if the increments are not both - unity. -*/ - - if (*incx != 1 || *incy != 1) { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - jx = kx; - jy = ky; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. -*/ - - if (lsame_(uplo, "U")) { - -/* Form A when A is stored in the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - i__3 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f - || y[i__3].i != 0.f)) { - r_cnjg(&q__2, &y[j]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__2 = j; - q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - q__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + - q__3.i; - i__6 = i__; - q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - q__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L10: */ - } - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = j; - q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - q__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = j; - q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - q__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = a[i__3].r + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - i__3 = jy; - if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f - || y[i__3].i != 0.f)) { - r_cnjg(&q__2, &y[jy]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__2 = jx; - q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - q__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + - q__3.i; - i__6 = iy; - q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - q__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - ix += *incx; - iy += *incy; -/* L30: */ - } - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = jx; - q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - q__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = jy; - q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - q__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = a[i__3].r + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } - jx += *incx; - jy += *incy; -/* L40: */ - } - } - } else { - -/* Form A when A is stored in the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - i__3 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f - || y[i__3].i != 0.f)) { - r_cnjg(&q__2, &y[j]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__2 = j; - q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = j; - q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - q__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = j; - q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - q__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = a[i__3].r + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - q__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + - q__3.i; - i__6 = i__; - q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - q__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L50: */ - } - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - i__3 = jy; - if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f - || y[i__3].i != 0.f)) { - r_cnjg(&q__2, &y[jy]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = - alpha->r * q__2.i + alpha->i * q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__2 = jx; - q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = jx; - q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - q__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = jy; - q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - q__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = a[i__3].r + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - q__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + - q__3.i; - i__6 = iy; - q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - q__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L70: */ - } - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } - - return 0; - -/* End of CHER2 . */ - -} /* cher2_ */ - -/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k, - complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, - real *beta, complex *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5, i__6, i__7; - real r__1; - complex q__1, q__2, q__3, q__4, q__5, q__6; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, l, info; - static complex temp1, temp2; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CHER2K performs one of the hermitian rank 2k operations - - C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, - - or - - C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, - - where alpha and beta are scalars with beta real, C is an n by n - hermitian matrix and A and B are n by k matrices in the first case - and k by n matrices in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + - conjg( alpha )*B*conjg( A' ) + - beta*C. - - TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + - conjg( alpha )*conjg( B' )*A + - beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrices A and B, and on entry with - TRANS = 'C' or 'c', K specifies the number of rows of the - matrices A and B. K must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array B must contain the matrix B, otherwise - the leading k by n part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDB must be at least max( 1, n ), otherwise LDB must - be at least max( 1, k ). - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - COMPLEX array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. - Ed Anderson, Cray Research Inc. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldb < max(1,nrowa)) { - info = 9; - } else if (*ldc < max(1,*n)) { - info = 12; - } - if (info != 0) { - xerbla_("CHER2K", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta == - 1.f) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0.f && alpha->i == 0.f) { - if (upper) { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L30: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; -/* L40: */ - } - } - } else { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* - Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + - C. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L90: */ - } - } else if (*beta != 1.f) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L100: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - i__4 = j + l * b_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != - 0.f || b[i__4].i != 0.f)) { - r_cnjg(&q__2, &b[j + l * b_dim1]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, - q__1.i = alpha->r * q__2.i + alpha->i * - q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__3 = j + l * a_dim1; - q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, - q__2.i = alpha->r * a[i__3].i + alpha->i * a[ - i__3].r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__3.r = a[i__6].r * temp1.r - a[i__6].i * - temp1.i, q__3.i = a[i__6].r * temp1.i + a[ - i__6].i * temp1.r; - q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] - .i + q__3.i; - i__7 = i__ + l * b_dim1; - q__4.r = b[i__7].r * temp2.r - b[i__7].i * - temp2.i, q__4.i = b[i__7].r * temp2.i + b[ - i__7].i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + - q__4.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L110: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, - q__2.i = a[i__5].r * temp1.i + a[i__5].i * - temp1.r; - i__6 = j + l * b_dim1; - q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, - q__3.i = b[i__6].r * temp2.i + b[i__6].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L140: */ - } - } else if (*beta != 1.f) { - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L150: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - i__4 = j + l * b_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != - 0.f || b[i__4].i != 0.f)) { - r_cnjg(&q__2, &b[j + l * b_dim1]); - q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, - q__1.i = alpha->r * q__2.i + alpha->i * - q__2.r; - temp1.r = q__1.r, temp1.i = q__1.i; - i__3 = j + l * a_dim1; - q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, - q__2.i = alpha->r * a[i__3].i + alpha->i * a[ - i__3].r; - r_cnjg(&q__1, &q__2); - temp2.r = q__1.r, temp2.i = q__1.i; - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__3.r = a[i__6].r * temp1.r - a[i__6].i * - temp1.i, q__3.i = a[i__6].r * temp1.i + a[ - i__6].i * temp1.r; - q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] - .i + q__3.i; - i__7 = i__ + l * b_dim1; - q__4.r = b[i__7].r * temp2.r - b[i__7].i * - temp2.i, q__4.i = b[i__7].r * temp2.i + b[ - i__7].i * temp2.r; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + - q__4.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L160: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, - q__2.i = a[i__5].r * temp1.i + a[i__5].i * - temp1.r; - i__6 = j + l * b_dim1; - q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, - q__3.i = b[i__6].r * temp2.i + b[i__6].i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - r__1 = c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* - Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + - C. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp1.r = 0.f, temp1.i = 0.f; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, - q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] - .r; - q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; - temp1.r = q__1.r, temp1.i = q__1.i; - r_cnjg(&q__3, &b[l + i__ * b_dim1]); - i__4 = l + j * a_dim1; - q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, - q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] - .r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L190: */ - } - if (i__ == j) { - if (*beta == 0.f) { - i__3 = j + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - r__1 = q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } else { - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - r__1 = *beta * c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } - } else { - if (*beta == 0.f) { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__3.r = *beta * c__[i__4].r, q__3.i = *beta * - c__[i__4].i; - q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__4.i = alpha->r * temp1.i + alpha->i * - temp1.r; - q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + - q__4.i; - r_cnjg(&q__6, alpha); - q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, - q__5.i = q__6.r * temp2.i + q__6.i * - temp2.r; - q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + - q__5.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp1.r = 0.f, temp1.i = 0.f; - temp2.r = 0.f, temp2.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, - q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] - .r; - q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; - temp1.r = q__1.r, temp1.i = q__1.i; - r_cnjg(&q__3, &b[l + i__ * b_dim1]); - i__4 = l + j * a_dim1; - q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, - q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] - .r; - q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; - temp2.r = q__1.r, temp2.i = q__1.i; -/* L220: */ - } - if (i__ == j) { - if (*beta == 0.f) { - i__3 = j + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - r__1 = q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } else { - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - r__1 = *beta * c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } - } else { - if (*beta == 0.f) { - i__3 = i__ + j * c_dim1; - q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - r_cnjg(&q__4, alpha); - q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, - q__3.i = q__4.r * temp2.i + q__4.i * - temp2.r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__3.r = *beta * c__[i__4].r, q__3.i = *beta * - c__[i__4].i; - q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, - q__4.i = alpha->r * temp1.i + alpha->i * - temp1.r; - q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + - q__4.i; - r_cnjg(&q__6, alpha); - q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, - q__5.i = q__6.r * temp2.i + q__6.i * - temp2.r; - q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + - q__5.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of CHER2K. */ - -} /* cher2k_ */ - -/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, - real *alpha, complex *a, integer *lda, real *beta, complex *c__, - integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, - i__6; - real r__1; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, l, info; - static complex temp; - extern logical lsame_(char *, char *); - static integer nrowa; - static real rtemp; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - CHERK performs one of the hermitian rank k operations - - C := alpha*A*conjg( A' ) + beta*C, - - or - - C := alpha*conjg( A' )*A + beta*C, - - where alpha and beta are real scalars, C is an n by n hermitian - matrix and A is an n by k matrix in the first case and a k by n - matrix in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. - - TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrix A, and on entry with - TRANS = 'C' or 'c', K specifies the number of rows of the - matrix A. K must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - COMPLEX array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. - Ed Anderson, Cray Research Inc. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldc < max(1,*n)) { - info = 10; - } - if (info != 0) { - xerbla_("CHERK ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.f) { - if (upper) { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L30: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; -/* L40: */ - } - } - } else { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*conjg( A' ) + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L90: */ - } - } else if (*beta != 1.f) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L100: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - r_cnjg(&q__2, &a[j + l * a_dim1]); - q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - q__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] - .i + q__2.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L110: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = i__ + l * a_dim1; - q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - r__1 = c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0.f, c__[i__3].i = 0.f; -/* L140: */ - } - } else if (*beta != 1.f) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L150: */ - } - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - r_cnjg(&q__2, &a[j + l * a_dim1]); - q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - r__1 = c__[i__4].r + q__1.r; - c__[i__3].r = r__1, c__[i__3].i = 0.f; - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - q__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] - .i + q__2.i; - c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*conjg( A' )*A + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = l + j * a_dim1; - q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, - q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L190: */ - } - if (*beta == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; - i__4 = i__ + j * c_dim1; - q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L200: */ - } - rtemp = 0.f; - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - r_cnjg(&q__3, &a[l + j * a_dim1]); - i__3 = l + j * a_dim1; - q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = - q__3.r * a[i__3].i + q__3.i * a[i__3].r; - q__1.r = rtemp + q__2.r, q__1.i = q__2.i; - rtemp = q__1.r; -/* L210: */ - } - if (*beta == 0.f) { - i__2 = j + j * c_dim1; - r__1 = *alpha * rtemp; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *alpha * rtemp + *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } -/* L220: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - rtemp = 0.f; - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - r_cnjg(&q__3, &a[l + j * a_dim1]); - i__3 = l + j * a_dim1; - q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = - q__3.r * a[i__3].i + q__3.i * a[i__3].r; - q__1.r = rtemp + q__2.r, q__1.i = q__2.i; - rtemp = q__1.r; -/* L230: */ - } - if (*beta == 0.f) { - i__2 = j + j * c_dim1; - r__1 = *alpha * rtemp; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - r__1 = *alpha * rtemp + *beta * c__[i__3].r; - c__[i__2].r = r__1, c__[i__2].i = 0.f; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - temp.r = 0.f, temp.i = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - r_cnjg(&q__3, &a[l + i__ * a_dim1]); - i__4 = l + j * a_dim1; - q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, - q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] - .r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L240: */ - } - if (*beta == 0.f) { - i__3 = i__ + j * c_dim1; - q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } else { - i__3 = i__ + j * c_dim1; - q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; - i__4 = i__ + j * c_dim1; - q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; - } -/* L250: */ - } -/* L260: */ - } - } - } - - return 0; - -/* End of CHERK . */ - -} /* cherk_ */ - -/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer * - incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - complex q__1; - - /* Local variables */ - static integer i__, nincx; - - -/* - scales a vector by a constant. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - i__3 = i__; - i__4 = i__; - q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[ - i__4].i + ca->i * cx[i__4].r; - cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; -/* L10: */ - } - return 0; - -/* code for increment equal to 1 */ - -L20: - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__1 = i__; - i__3 = i__; - q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[ - i__3].i + ca->i * cx[i__3].r; - cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; -/* L30: */ - } - return 0; -} /* cscal_ */ - -/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - real r__1, r__2; - complex q__1; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, nincx; - - -/* - scales a complex vector by a real constant. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - i__3 = i__; - i__4 = i__; - r__1 = *sa * cx[i__4].r; - r__2 = *sa * r_imag(&cx[i__]); - q__1.r = r__1, q__1.i = r__2; - cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; -/* L10: */ - } - return 0; - -/* code for increment equal to 1 */ - -L20: - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__1 = i__; - i__3 = i__; - r__1 = *sa * cx[i__3].r; - r__2 = *sa * r_imag(&cx[i__]); - q__1.r = r__1, q__1.i = r__2; - cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; -/* L30: */ - } - return 0; -} /* csscal_ */ - -/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex * - cy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Local variables */ - static integer i__, ix, iy; - static complex ctemp; - - -/* - interchanges two vectors. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; - i__2 = ix; - i__3 = iy; - cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; - i__2 = iy; - cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; - i__2 = i__; - i__3 = i__; - cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; - i__2 = i__; - cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; -/* L30: */ - } - return 0; -} /* cswap_ */ - -/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, complex *alpha, complex *a, integer *lda, - complex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, - i__6; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, k, info; - static complex temp; - extern logical lsame_(char *, char *); - static logical lside; - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - CTRMM performs one of the matrix-matrix operations - - B := alpha*op( A )*B, or B := alpha*B*op( A ) - - where alpha is a scalar, B is an m by n matrix, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) multiplies B from - the left or right as follows: - - SIDE = 'L' or 'l' B := alpha*op( A )*B. - - SIDE = 'R' or 'r' B := alpha*B*op( A ). - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = conjg( A' ). - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - COMPLEX array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the matrix B, and on exit is overwritten by the - transformed matrix. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - noconj = lsame_(transa, "T"); - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("CTRMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0.f && alpha->i == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - b[i__3].r = 0.f, b[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*A*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * b_dim1; - if (b[i__3].r != 0.f || b[i__3].i != 0.f) { - i__3 = k + j * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, q__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = k - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * a_dim1; - q__2.r = temp.r * a[i__6].r - temp.i * a[i__6] - .i, q__2.i = temp.r * a[i__6].i + - temp.i * a[i__6].r; - q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] - .i + q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L30: */ - } - if (nounit) { - i__3 = k + k * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] - .i, q__1.i = temp.r * a[i__3].i + - temp.i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__3 = k + j * b_dim1; - b[i__3].r = temp.r, b[i__3].i = temp.i; - } -/* L40: */ - } -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (k = *m; k >= 1; --k) { - i__2 = k + j * b_dim1; - if (b[i__2].r != 0.f || b[i__2].i != 0.f) { - i__2 = k + j * b_dim1; - q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] - .i, q__1.i = alpha->r * b[i__2].i + - alpha->i * b[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - i__2 = k + j * b_dim1; - b[i__2].r = temp.r, b[i__2].i = temp.i; - if (nounit) { - i__2 = k + j * b_dim1; - i__3 = k + j * b_dim1; - i__4 = k + k * a_dim1; - q__1.r = b[i__3].r * a[i__4].r - b[i__3].i * - a[i__4].i, q__1.i = b[i__3].r * a[ - i__4].i + b[i__3].i * a[i__4].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; - } - i__2 = *m; - for (i__ = k + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5] - .i, q__2.i = temp.r * a[i__5].i + - temp.i * a[i__5].r; - q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] - .i + q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L60: */ - } - } -/* L70: */ - } -/* L80: */ - } - } - } else { - -/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - i__2 = i__ + j * b_dim1; - temp.r = b[i__2].r, temp.i = b[i__2].i; - if (noconj) { - if (nounit) { - i__2 = i__ + i__ * a_dim1; - q__1.r = temp.r * a[i__2].r - temp.i * a[i__2] - .i, q__1.i = temp.r * a[i__2].i + - temp.i * a[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * a_dim1; - i__4 = k + j * b_dim1; - q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * - b[i__4].i, q__2.i = a[i__3].r * b[ - i__4].i + a[i__3].i * b[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L90: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - r_cnjg(&q__3, &a[k + i__ * a_dim1]); - i__3 = k + j * b_dim1; - q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] - .i, q__2.i = q__3.r * b[i__3].i + - q__3.i * b[i__3].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - } - i__2 = i__ + j * b_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L110: */ - } -/* L120: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - temp.r = b[i__3].r, temp.i = b[i__3].i; - if (noconj) { - if (nounit) { - i__3 = i__ + i__ * a_dim1; - q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] - .i, q__1.i = temp.r * a[i__3].i + - temp.i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - i__4 = k + i__ * a_dim1; - i__5 = k + j * b_dim1; - q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * - b[i__5].i, q__2.i = a[i__4].r * b[ - i__5].i + a[i__4].i * b[i__5].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L130: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - r_cnjg(&q__3, &a[k + i__ * a_dim1]); - i__4 = k + j * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] - .i, q__2.i = q__3.r * b[i__4].i + - q__3.i * b[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L140: */ - } - } - i__3 = i__ + j * b_dim1; - q__1.r = alpha->r * temp.r - alpha->i * temp.i, - q__1.i = alpha->r * temp.i + alpha->i * - temp.r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L150: */ - } -/* L160: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*A. */ - - if (upper) { - for (j = *n; j >= 1; --j) { - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - i__1 = j + j * a_dim1; - q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - q__1.i = temp.r * a[i__1].i + temp.i * a[i__1] - .r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - q__1.i = temp.r * b[i__3].i + temp.i * b[i__3] - .r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L170: */ - } - i__1 = j - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + j * a_dim1; - if (a[i__2].r != 0.f || a[i__2].i != 0.f) { - i__2 = k + j * a_dim1; - q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] - .i, q__1.i = alpha->r * a[i__2].i + - alpha->i * a[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, q__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] - .i + q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L180: */ - } - } -/* L190: */ - } -/* L200: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - i__2 = j + j * a_dim1; - q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__1.i = temp.r * a[i__2].i + temp.i * a[i__2] - .r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - q__1.i = temp.r * b[i__4].i + temp.i * b[i__4] - .r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L210: */ - } - i__2 = *n; - for (k = j + 1; k <= i__2; ++k) { - i__3 = k + j * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - i__3 = k + j * a_dim1; - q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] - .i, q__1.i = alpha->r * a[i__3].i + - alpha->i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, q__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] - .i + q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L220: */ - } - } -/* L230: */ - } -/* L240: */ - } - } - } else { - -/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ - - if (upper) { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - i__2 = k - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + k * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - if (noconj) { - i__3 = j + k * a_dim1; - q__1.r = alpha->r * a[i__3].r - alpha->i * a[ - i__3].i, q__1.i = alpha->r * a[i__3] - .i + alpha->i * a[i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[j + k * a_dim1]); - q__1.r = alpha->r * q__2.r - alpha->i * - q__2.i, q__1.i = alpha->r * q__2.i + - alpha->i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, q__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] - .i + q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L250: */ - } - } -/* L260: */ - } - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - if (noconj) { - i__2 = k + k * a_dim1; - q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[k + k * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - } - if (temp.r != 1.f || temp.i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - q__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L270: */ - } - } -/* L280: */ - } - } else { - for (k = *n; k >= 1; --k) { - i__1 = *n; - for (j = k + 1; j <= i__1; ++j) { - i__2 = j + k * a_dim1; - if (a[i__2].r != 0.f || a[i__2].i != 0.f) { - if (noconj) { - i__2 = j + k * a_dim1; - q__1.r = alpha->r * a[i__2].r - alpha->i * a[ - i__2].i, q__1.i = alpha->r * a[i__2] - .i + alpha->i * a[i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[j + k * a_dim1]); - q__1.r = alpha->r * q__2.r - alpha->i * - q__2.i, q__1.i = alpha->r * q__2.i + - alpha->i * q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, q__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] - .i + q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L290: */ - } - } -/* L300: */ - } - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - if (noconj) { - i__1 = k + k * a_dim1; - q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - q__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[k + k * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - } - if (temp.r != 1.f || temp.i != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - q__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L310: */ - } - } -/* L320: */ - } - } - } - } - - return 0; - -/* End of CTRMM . */ - -} /* ctrmm_ */ - -/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n, - complex *a, integer *lda, complex *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static complex temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - CTRMV performs one of the matrix-vector operations - - x := A*x, or x := A'*x, or x := conjg( A' )*x, - - where x is an n element vector and A is an n by n unit, or non-unit, - upper or lower triangular matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' x := A*x. - - TRANS = 'T' or 't' x := A'*x. - - TRANS = 'C' or 'c' x := conjg( A' )*x. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. On exit, X is overwritten with the - tranformed vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("CTRMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + - q__2.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; -/* L10: */ - } - if (nounit) { - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ - i__4].i, q__1.i = x[i__3].r * a[i__4].i + - x[i__3].i * a[i__4].r; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = kx; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = ix; - i__4 = ix; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + - q__2.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - ix += *incx; -/* L30: */ - } - if (nounit) { - i__2 = jx; - i__3 = jx; - i__4 = j + j * a_dim1; - q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ - i__4].i, q__1.i = x[i__3].r * a[i__4].i + - x[i__3].i * a[i__4].r; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - } - } - jx += *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__; - i__3 = i__; - i__4 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - q__2.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + - q__2.i; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; -/* L50: */ - } - if (nounit) { - i__1 = j; - i__2 = j; - i__3 = j + j * a_dim1; - q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, q__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = kx; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = ix; - i__3 = ix; - i__4 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - q__2.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + - q__2.i; - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - ix -= *incx; -/* L70: */ - } - if (nounit) { - i__1 = jx; - i__2 = jx; - i__3 = j + j * a_dim1; - q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, q__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; - } - } - jx -= *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - if (nounit) { - i__1 = j + j * a_dim1; - q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - q__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = q__1.r, temp.i = q__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - i__1 = i__ + j * a_dim1; - i__2 = i__; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, q__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L90: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__1 = i__; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, - q__2.i = q__3.r * x[i__1].i + q__3.i * x[ - i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - } - i__1 = j; - x[i__1].r = temp.r, x[i__1].i = temp.i; -/* L110: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = jx; - if (noconj) { - if (nounit) { - i__1 = j + j * a_dim1; - q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - q__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = q__1.r, temp.i = q__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - i__1 = i__ + j * a_dim1; - i__2 = ix; - q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, q__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L120: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__1 = ix; - q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, - q__2.i = q__3.r * x[i__1].i + q__3.i * x[ - i__1].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L130: */ - } - } - i__1 = jx; - x[i__1].r = temp.r, x[i__1].i = temp.i; - jx -= *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - if (nounit) { - i__2 = j + j * a_dim1; - q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, q__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L150: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[ - i__3].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L160: */ - } - } - i__2 = j; - x[i__2].r = temp.r, x[i__2].i = temp.i; -/* L170: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = jx; - if (noconj) { - if (nounit) { - i__2 = j + j * a_dim1; - q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - q__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - i__3 = i__ + j * a_dim1; - i__4 = ix; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, q__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L180: */ - } - } else { - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = temp.r * q__2.r - temp.i * q__2.i, - q__1.i = temp.r * q__2.i + temp.i * - q__2.r; - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[ - i__3].r; - q__1.r = temp.r + q__2.r, q__1.i = temp.i + - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L190: */ - } - } - i__2 = jx; - x[i__2].r = temp.r, x[i__2].i = temp.i; - jx += *incx; -/* L200: */ - } - } - } - } - - return 0; - -/* End of CTRMV . */ - -} /* ctrmv_ */ - -/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, complex *alpha, complex *a, integer *lda, - complex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, - i__6, i__7; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, k, info; - static complex temp; - extern logical lsame_(char *, char *); - static logical lside; - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - CTRSM solves one of the matrix equations - - op( A )*X = alpha*B, or X*op( A ) = alpha*B, - - where alpha is a scalar, X and B are m by n matrices, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). - - The matrix X is overwritten on B. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) appears on the left - or right of X as follows: - - SIDE = 'L' or 'l' op( A )*X = alpha*B. - - SIDE = 'R' or 'r' X*op( A ) = alpha*B. - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = conjg( A' ). - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - COMPLEX . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - COMPLEX array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the right-hand side matrix B, and on exit is - overwritten by the solution matrix X. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - noconj = lsame_(transa, "T"); - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("CTRSM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0.f && alpha->i == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - b[i__3].r = 0.f, b[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*inv( A )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1.f || alpha->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, q__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L30: */ - } - } - for (k = *m; k >= 1; --k) { - i__2 = k + j * b_dim1; - if (b[i__2].r != 0.f || b[i__2].i != 0.f) { - if (nounit) { - i__2 = k + j * b_dim1; - c_div(&q__1, &b[k + j * b_dim1], &a[k + k * - a_dim1]); - b[i__2].r = q__1.r, b[i__2].i = q__1.i; - } - i__2 = k - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = k + j * b_dim1; - i__6 = i__ + k * a_dim1; - q__2.r = b[i__5].r * a[i__6].r - b[i__5].i * - a[i__6].i, q__2.i = b[i__5].r * a[ - i__6].i + b[i__5].i * a[i__6].r; - q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] - .i - q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L40: */ - } - } -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1.f || alpha->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, q__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L70: */ - } - } - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * b_dim1; - if (b[i__3].r != 0.f || b[i__3].i != 0.f) { - if (nounit) { - i__3 = k + j * b_dim1; - c_div(&q__1, &b[k + j * b_dim1], &a[k + k * - a_dim1]); - b[i__3].r = q__1.r, b[i__3].i = q__1.i; - } - i__3 = *m; - for (i__ = k + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = k + j * b_dim1; - i__7 = i__ + k * a_dim1; - q__2.r = b[i__6].r * a[i__7].r - b[i__6].i * - a[i__7].i, q__2.i = b[i__6].r * a[ - i__7].i + b[i__6].i * a[i__7].r; - q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] - .i - q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L80: */ - } - } -/* L90: */ - } -/* L100: */ - } - } - } else { - -/* - Form B := alpha*inv( A' )*B - or B := alpha*inv( conjg( A' ) )*B. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - q__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = q__1.r, temp.i = q__1.i; - if (noconj) { - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - i__4 = k + i__ * a_dim1; - i__5 = k + j * b_dim1; - q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * - b[i__5].i, q__2.i = a[i__4].r * b[ - i__5].i + a[i__4].i * b[i__5].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L110: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - r_cnjg(&q__3, &a[k + i__ * a_dim1]); - i__4 = k + j * b_dim1; - q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] - .i, q__2.i = q__3.r * b[i__4].i + - q__3.i * b[i__4].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L120: */ - } - if (nounit) { - r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__3 = i__ + j * b_dim1; - b[i__3].r = temp.r, b[i__3].i = temp.i; -/* L130: */ - } -/* L140: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - i__2 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, - q__1.i = alpha->r * b[i__2].i + alpha->i * b[ - i__2].r; - temp.r = q__1.r, temp.i = q__1.i; - if (noconj) { - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - i__3 = k + i__ * a_dim1; - i__4 = k + j * b_dim1; - q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * - b[i__4].i, q__2.i = a[i__3].r * b[ - i__4].i + a[i__3].i * b[i__4].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L150: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - r_cnjg(&q__3, &a[k + i__ * a_dim1]); - i__3 = k + j * b_dim1; - q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] - .i, q__2.i = q__3.r * b[i__3].i + - q__3.i * b[i__3].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L160: */ - } - if (nounit) { - r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__2 = i__ + j * b_dim1; - b[i__2].r = temp.r, b[i__2].i = temp.i; -/* L170: */ - } -/* L180: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*inv( A ). */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1.f || alpha->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, q__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L190: */ - } - } - i__2 = j - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = k + j * a_dim1; - i__7 = i__ + k * b_dim1; - q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * - b[i__7].i, q__2.i = a[i__6].r * b[ - i__7].i + a[i__6].i * b[i__7].r; - q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] - .i - q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L200: */ - } - } -/* L210: */ - } - if (nounit) { - c_div(&q__1, &c_b21, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - q__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L220: */ - } - } -/* L230: */ - } - } else { - for (j = *n; j >= 1; --j) { - if (alpha->r != 1.f || alpha->i != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, q__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L240: */ - } - } - i__1 = *n; - for (k = j + 1; k <= i__1; ++k) { - i__2 = k + j * a_dim1; - if (a[i__2].r != 0.f || a[i__2].i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = k + j * a_dim1; - i__6 = i__ + k * b_dim1; - q__2.r = a[i__5].r * b[i__6].r - a[i__5].i * - b[i__6].i, q__2.i = a[i__5].r * b[ - i__6].i + a[i__5].i * b[i__6].r; - q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] - .i - q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L250: */ - } - } -/* L260: */ - } - if (nounit) { - c_div(&q__1, &c_b21, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - q__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L270: */ - } - } -/* L280: */ - } - } - } else { - -/* - Form B := alpha*B*inv( A' ) - or B := alpha*B*inv( conjg( A' ) ). -*/ - - if (upper) { - for (k = *n; k >= 1; --k) { - if (nounit) { - if (noconj) { - c_div(&q__1, &c_b21, &a[k + k * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[k + k * a_dim1]); - c_div(&q__1, &c_b21, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - q__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L290: */ - } - } - i__1 = k - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = j + k * a_dim1; - if (a[i__2].r != 0.f || a[i__2].i != 0.f) { - if (noconj) { - i__2 = j + k * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - } else { - r_cnjg(&q__1, &a[j + k * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, q__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] - .i - q__2.i; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L300: */ - } - } -/* L310: */ - } - if (alpha->r != 1.f || alpha->i != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, q__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - b[i__2].r = q__1.r, b[i__2].i = q__1.i; -/* L320: */ - } - } -/* L330: */ - } - } else { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - if (nounit) { - if (noconj) { - c_div(&q__1, &c_b21, &a[k + k * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } else { - r_cnjg(&q__2, &a[k + k * a_dim1]); - c_div(&q__1, &c_b21, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - q__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L340: */ - } - } - i__2 = *n; - for (j = k + 1; j <= i__2; ++j) { - i__3 = j + k * a_dim1; - if (a[i__3].r != 0.f || a[i__3].i != 0.f) { - if (noconj) { - i__3 = j + k * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - } else { - r_cnjg(&q__1, &a[j + k * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, q__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] - .i - q__2.i; - b[i__4].r = q__1.r, b[i__4].i = q__1.i; -/* L350: */ - } - } -/* L360: */ - } - if (alpha->r != 1.f || alpha->i != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, q__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = q__1.r, b[i__3].i = q__1.i; -/* L370: */ - } - } -/* L380: */ - } - } - } - } - - return 0; - -/* End of CTRSM . */ - -} /* ctrsm_ */ - -/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, - complex *a, integer *lda, complex *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2, q__3; - - /* Builtin functions */ - void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static complex temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - CTRSV solves one of the systems of equations - - A*x = b, or A'*x = b, or conjg( A' )*x = b, - - where b and x are n element vectors and A is an n by n unit, or - non-unit, upper or lower triangular matrix. - - No test for singularity or near-singularity is included in this - routine. Such tests must be performed before calling this routine. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the equations to be solved as - follows: - - TRANS = 'N' or 'n' A*x = b. - - TRANS = 'T' or 't' A'*x = b. - - TRANS = 'C' or 'c' conjg( A' )*x = b. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - COMPLEX array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element right-hand side vector b. On exit, X is overwritten - with the solution vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("CTRSV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := inv( A )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - if (nounit) { - i__1 = j; - c_div(&q__1, &x[j], &a[j + j * a_dim1]); - x[i__1].r = q__1.r, x[i__1].i = q__1.i; - } - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - for (i__ = j - 1; i__ >= 1; --i__) { - i__1 = i__; - i__2 = i__; - i__3 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - q__2.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - - q__2.i; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; -/* L10: */ - } - } -/* L20: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - if (x[i__1].r != 0.f || x[i__1].i != 0.f) { - if (nounit) { - i__1 = jx; - c_div(&q__1, &x[jx], &a[j + j * a_dim1]); - x[i__1].r = q__1.r, x[i__1].i = q__1.i; - } - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = jx; - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - i__1 = ix; - i__2 = ix; - i__3 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - q__2.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - - q__2.i; - x[i__1].r = q__1.r, x[i__1].i = q__1.i; -/* L30: */ - } - } - jx -= *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - if (nounit) { - i__2 = j; - c_div(&q__1, &x[j], &a[j + j * a_dim1]); - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - } - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - - q__2.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; -/* L50: */ - } - } -/* L60: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0.f || x[i__2].i != 0.f) { - if (nounit) { - i__2 = jx; - c_div(&q__1, &x[jx], &a[j + j * a_dim1]); - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - } - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = jx; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - i__3 = ix; - i__4 = ix; - i__5 = i__ + j * a_dim1; - q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - q__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - - q__2.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, q__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L90: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = i__; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[ - i__3].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L100: */ - } - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__2 = j; - x[i__2].r = temp.r, x[i__2].i = temp.i; -/* L110: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - ix = kx; - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = ix; - q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, q__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L120: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__3 = ix; - q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, - q__2.i = q__3.r * x[i__3].i + q__3.i * x[ - i__3].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix += *incx; -/* L130: */ - } - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__2 = jx; - x[i__2].r = temp.r, x[i__2].i = temp.i; - jx += *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__; - q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ - i__3].i, q__2.i = a[i__2].r * x[i__3].i + - a[i__2].i * x[i__3].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L150: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__2 = i__; - q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, - q__2.i = q__3.r * x[i__2].i + q__3.i * x[ - i__2].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; -/* L160: */ - } - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__1 = j; - x[i__1].r = temp.r, x[i__1].i = temp.i; -/* L170: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - ix = kx; - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = ix; - q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ - i__3].i, q__2.i = a[i__2].r * x[i__3].i + - a[i__2].i * x[i__3].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; -/* L180: */ - } - if (nounit) { - c_div(&q__1, &temp, &a[j + j * a_dim1]); - temp.r = q__1.r, temp.i = q__1.i; - } - } else { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - r_cnjg(&q__3, &a[i__ + j * a_dim1]); - i__2 = ix; - q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, - q__2.i = q__3.r * x[i__2].i + q__3.i * x[ - i__2].r; - q__1.r = temp.r - q__2.r, q__1.i = temp.i - - q__2.i; - temp.r = q__1.r, temp.i = q__1.i; - ix -= *incx; -/* L190: */ - } - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - c_div(&q__1, &temp, &q__2); - temp.r = q__1.r, temp.i = q__1.i; - } - } - i__1 = jx; - x[i__1].r = temp.r, x[i__1].i = temp.i; - jx -= *incx; -/* L200: */ - } - } - } - } - - return 0; - -/* End of CTRSV . */ - -} /* ctrsv_ */ - -/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, - integer *incx, doublereal *dy, integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - - -/* - constant times a vector plus a vector. - uses unrolled loops for increments equal to one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*da == 0.) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[iy] += *da * dx[ix]; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 4; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[i__] += *da * dx[i__]; -/* L30: */ - } - if (*n < 4) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 4) { - dy[i__] += *da * dx[i__]; - dy[i__ + 1] += *da * dx[i__ + 1]; - dy[i__ + 2] += *da * dx[i__ + 2]; - dy[i__ + 3] += *da * dx[i__ + 3]; -/* L50: */ - } - return 0; -} /* daxpy_ */ - -doublereal dcabs1_(doublecomplex *z__) -{ - /* System generated locals */ - doublereal ret_val; - static doublecomplex equiv_0[1]; - - /* Local variables */ -#define t ((doublereal *)equiv_0) -#define zz (equiv_0) - - zz->r = z__->r, zz->i = z__->i; - ret_val = abs(t[0]) + abs(t[1]); - return ret_val; -} /* dcabs1_ */ - -#undef zz -#undef t - - -/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, - doublereal *dy, integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - - -/* - copies a vector, x, to a vector, y. - uses unrolled loops for increments equal to one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[iy] = dx[ix]; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 7; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dy[i__] = dx[i__]; -/* L30: */ - } - if (*n < 7) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 7) { - dy[i__] = dx[i__]; - dy[i__ + 1] = dx[i__ + 1]; - dy[i__ + 2] = dx[i__ + 2]; - dy[i__ + 3] = dx[i__ + 3]; - dy[i__ + 4] = dx[i__ + 4]; - dy[i__ + 5] = dx[i__ + 5]; - dy[i__ + 6] = dx[i__ + 6]; -/* L50: */ - } - return 0; -} /* dcopy_ */ - -doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, - integer *incy) -{ - /* System generated locals */ - integer i__1; - doublereal ret_val; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - static doublereal dtemp; - - -/* - forms the dot product of two vectors. - uses unrolled loops for increments equal to one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - ret_val = 0.; - dtemp = 0.; - if (*n <= 0) { - return ret_val; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp += dx[ix] * dy[iy]; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val = dtemp; - return ret_val; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 5; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp += dx[i__] * dy[i__]; -/* L30: */ - } - if (*n < 5) { - goto L60; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 5) { - dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[ - i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + - 4] * dy[i__ + 4]; -/* L50: */ - } -L60: - ret_val = dtemp; - return ret_val; -} /* ddot_ */ - -/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer * - n, integer *k, doublereal *alpha, doublereal *a, integer *lda, - doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, - integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__, j, l, info; - static logical nota, notb; - static doublereal temp; - static integer ncola; - extern logical lsame_(char *, char *); - static integer nrowa, nrowb; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DGEMM performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X', - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - Parameters - ========== - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n', op( A ) = A. - - TRANSA = 'T' or 't', op( A ) = A'. - - TRANSA = 'C' or 'c', op( A ) = A'. - - Unchanged on exit. - - TRANSB - CHARACTER*1. - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - - TRANSB = 'N' or 'n', op( B ) = B. - - TRANSB = 'T' or 't', op( B ) = B'. - - TRANSB = 'C' or 'c', op( B ) = B'. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is - k when TRANSA = 'N' or 'n', and is m otherwise. - Before entry with TRANSA = 'N' or 'n', the leading m by k - part of the array A must contain the matrix A, otherwise - the leading k by m part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANSA = 'N' or 'n' then - LDA must be at least max( 1, m ), otherwise LDA must be at - least max( 1, k ). - Unchanged on exit. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANSB = 'N' or 'n' then - LDB must be at least max( 1, k ), otherwise LDB must be at - least max( 1, n ). - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - Unchanged on exit. - - C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*op( B ) + beta*C ). - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Set NOTA and NOTB as true if A and B respectively are not - transposed and set NROWA, NCOLA and NROWB as the number of rows - and columns of A and the number of rows of B respectively. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - nota = lsame_(transa, "N"); - notb = lsame_(transb, "N"); - if (nota) { - nrowa = *m; - ncola = *k; - } else { - nrowa = *k; - ncola = *m; - } - if (notb) { - nrowb = *k; - } else { - nrowb = *n; - } - -/* Test the input parameters. */ - - info = 0; - if (! nota && ! lsame_(transa, "C") && ! lsame_( - transa, "T")) { - info = 1; - } else if (! notb && ! lsame_(transb, "C") && ! - lsame_(transb, "T")) { - info = 2; - } else if (*m < 0) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < max(1,nrowa)) { - info = 8; - } else if (*ldb < max(1,nrowb)) { - info = 10; - } else if (*ldc < max(1,*m)) { - info = 13; - } - if (info != 0) { - xerbla_("DGEMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { - return 0; - } - -/* And if alpha.eq.zero. */ - - if (*alpha == 0.) { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - return 0; - } - -/* Start the operations. */ - - if (notb) { - if (nota) { - -/* Form C := alpha*A*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L50: */ - } - } else if (*beta != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L60: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (b[l + j * b_dim1] != 0.) { - temp = *alpha * b[l + j * b_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L70: */ - } - } -/* L80: */ - } -/* L90: */ - } - } else { - -/* Form C := alpha*A'*B + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; -/* L100: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L110: */ - } -/* L120: */ - } - } - } else { - if (nota) { - -/* Form C := alpha*A*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L130: */ - } - } else if (*beta != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L140: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (b[j + l * b_dim1] != 0.) { - temp = *alpha * b[j + l * b_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L150: */ - } - } -/* L160: */ - } -/* L170: */ - } - } else { - -/* Form C := alpha*A'*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; -/* L180: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L190: */ - } -/* L200: */ - } - } - } - - return 0; - -/* End of DGEMM . */ - -} /* dgemm_ */ - -/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal * - alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, - doublereal *beta, doublereal *y, integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublereal temp; - static integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DGEMV performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - Parameters - ========== - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - X - DOUBLE PRECISION array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - DOUBLE PRECISION array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("DGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { - return 0; - } - -/* - Set LENX and LENY, the lengths of the vectors x and y, and set - up the start points in X and Y. -*/ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. - - First form y := beta*y. -*/ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = *alpha * x[jx]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = *alpha * x[jx]; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp * a[i__ + j * a_dim1]; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L100: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L110: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of DGEMV . */ - -} /* dgemv_ */ - -/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, - doublereal *x, integer *incx, doublereal *y, integer *incy, - doublereal *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static doublereal temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DGER performs the rank 1 operation - - A := alpha*x*y' + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - DOUBLE PRECISION array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("DGER ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0.) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.) { - temp = *alpha * y[jy]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[i__] * temp; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.) { - temp = *alpha * y[jy]; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[ix] * temp; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of DGER . */ - -} /* dger_ */ - -doublereal dnrm2_(integer *n, doublereal *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - doublereal ret_val, d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer ix; - static doublereal ssq, norm, scale, absxi; - - -/* - DNRM2 returns the euclidean norm of a vector via the function - name, so that - - DNRM2 := sqrt( x'*x ) - - - -- This version written on 25-October-1982. - Modified on 14-October-1993 to inline the call to DLASSQ. - Sven Hammarling, Nag Ltd. -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n < 1 || *incx < 1) { - norm = 0.; - } else if (*n == 1) { - norm = abs(x[1]); - } else { - scale = 0.; - ssq = 1.; -/* - The following loop is equivalent to this call to the LAPACK - auxiliary routine: - CALL DLASSQ( N, X, INCX, SCALE, SSQ ) -*/ - - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - if (x[ix] != 0.) { - absxi = (d__1 = x[ix], abs(d__1)); - if (scale < absxi) { -/* Computing 2nd power */ - d__1 = scale / absxi; - ssq = ssq * (d__1 * d__1) + 1.; - scale = absxi; - } else { -/* Computing 2nd power */ - d__1 = absxi / scale; - ssq += d__1 * d__1; - } - } -/* L10: */ - } - norm = scale * sqrt(ssq); - } - - ret_val = norm; - return ret_val; - -/* End of DNRM2. */ - -} /* dnrm2_ */ - -/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, - doublereal *dy, integer *incy, doublereal *c__, doublereal *s) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, ix, iy; - static doublereal dtemp; - - -/* - applies a plane rotation. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = *c__ * dx[ix] + *s * dy[iy]; - dy[iy] = *c__ * dy[iy] - *s * dx[ix]; - dx[ix] = dtemp; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = *c__ * dx[i__] + *s * dy[i__]; - dy[i__] = *c__ * dy[i__] - *s * dx[i__]; - dx[i__] = dtemp; -/* L30: */ - } - return 0; -} /* drot_ */ - -/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, - integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - static integer i__, m, mp1, nincx; - - -/* - scales a vector by a constant. - uses unrolled loops for increment equal to one. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - dx[i__] = *da * dx[i__]; -/* L10: */ - } - return 0; - -/* - code for increment equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 5; - if (m == 0) { - goto L40; - } - i__2 = m; - for (i__ = 1; i__ <= i__2; ++i__) { - dx[i__] = *da * dx[i__]; -/* L30: */ - } - if (*n < 5) { - return 0; - } -L40: - mp1 = m + 1; - i__2 = *n; - for (i__ = mp1; i__ <= i__2; i__ += 5) { - dx[i__] = *da * dx[i__]; - dx[i__ + 1] = *da * dx[i__ + 1]; - dx[i__ + 2] = *da * dx[i__ + 2]; - dx[i__ + 3] = *da * dx[i__ + 3]; - dx[i__ + 4] = *da * dx[i__ + 4]; -/* L50: */ - } - return 0; -} /* dscal_ */ - -/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, - doublereal *dy, integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - static doublereal dtemp; - - -/* - interchanges two vectors. - uses unrolled loops for increments equal one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dy; - --dx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = dx[ix]; - dx[ix] = dy[iy]; - dy[iy] = dtemp; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 3; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - dtemp = dx[i__]; - dx[i__] = dy[i__]; - dy[i__] = dtemp; -/* L30: */ - } - if (*n < 3) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 3) { - dtemp = dx[i__]; - dx[i__] = dy[i__]; - dy[i__] = dtemp; - dtemp = dx[i__ + 1]; - dx[i__ + 1] = dy[i__ + 1]; - dy[i__ + 1] = dtemp; - dtemp = dx[i__ + 2]; - dx[i__ + 2] = dy[i__ + 2]; - dy[i__ + 2] = dtemp; -/* L50: */ - } - return 0; -} /* dswap_ */ - -/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, - doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal - *beta, doublereal *y, integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublereal temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DSYMV performs the matrix-vector operation - - y := alpha*A*x + beta*y, - - where alpha and beta are scalars, x and y are n element vectors and - A is an n by n symmetric matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of A is not referenced. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. On exit, Y is overwritten by the updated - vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*lda < max(1,*n)) { - info = 5; - } else if (*incx == 0) { - info = 7; - } else if (*incy == 0) { - info = 10; - } - if (info != 0) { - xerbla_("DSYMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || *alpha == 0. && *beta == 1.) { - return 0; - } - -/* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. - - First form y := beta*y. -*/ - - if (*beta != 1.) { - if (*incy == 1) { - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.) { - return 0; - } - if (lsame_(uplo, "U")) { - -/* Form y when A is stored in upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[i__]; -/* L50: */ - } - y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } else { - -/* Form y when A is stored in lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.; - y[j] += temp1 * a[j + j * a_dim1]; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[j] += *alpha * temp2; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.; - y[jy] += temp1 * a[j + j * a_dim1]; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of DSYMV . */ - -} /* dsymv_ */ - -/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, - doublereal *x, integer *incx, doublereal *y, integer *incy, - doublereal *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublereal temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DSYR2 performs the symmetric rank 2 operation - - A := alpha*x*y' + alpha*y*x' + A, - - where alpha is a scalar, x and y are n element vectors and A is an n - by n symmetric matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of A is not referenced. On exit, the - upper triangular part of the array A is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of A is not referenced. On exit, the - lower triangular part of the array A is overwritten by the - lower triangular part of the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*n)) { - info = 9; - } - if (info != 0) { - xerbla_("DSYR2 ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || *alpha == 0.) { - return 0; - } - -/* - Set up the start points in X and Y if the increments are not both - unity. -*/ - - if (*incx != 1 || *incy != 1) { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - jx = kx; - jy = ky; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. -*/ - - if (lsame_(uplo, "U")) { - -/* Form A when A is stored in the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0. || y[j] != 0.) { - temp1 = *alpha * y[j]; - temp2 = *alpha * x[j]; - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * - temp1 + y[i__] * temp2; -/* L10: */ - } - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0. || y[jy] != 0.) { - temp1 = *alpha * y[jy]; - temp2 = *alpha * x[jx]; - ix = kx; - iy = ky; - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * - temp1 + y[iy] * temp2; - ix += *incx; - iy += *incy; -/* L30: */ - } - } - jx += *incx; - jy += *incy; -/* L40: */ - } - } - } else { - -/* Form A when A is stored in the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0. || y[j] != 0.) { - temp1 = *alpha * y[j]; - temp2 = *alpha * x[j]; - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * - temp1 + y[i__] * temp2; -/* L50: */ - } - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0. || y[jy] != 0.) { - temp1 = *alpha * y[jy]; - temp2 = *alpha * x[jx]; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * - temp1 + y[iy] * temp2; - ix += *incx; - iy += *incy; -/* L70: */ - } - } - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } - - return 0; - -/* End of DSYR2 . */ - -} /* dsyr2_ */ - -/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, - doublereal *alpha, doublereal *a, integer *lda, doublereal *b, - integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__, j, l, info; - static doublereal temp1, temp2; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DSYR2K performs one of the symmetric rank 2k operations - - C := alpha*A*B' + alpha*B*A' + beta*C, - - or - - C := alpha*A'*B + alpha*B'*A + beta*C, - - where alpha and beta are scalars, C is an n by n symmetric matrix - and A and B are n by k matrices in the first case and k by n - matrices in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + - beta*C. - - TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + - beta*C. - - TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + - beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrices A and B, and on entry with - TRANS = 'T' or 't' or 'C' or 'c', K specifies the number - of rows of the matrices A and B. K must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array B must contain the matrix B, otherwise - the leading k by n part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDB must be at least max( 1, n ), otherwise LDB must - be at least max( 1, k ). - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldb < max(1,nrowa)) { - info = 9; - } else if (*ldc < max(1,*n)) { - info = 12; - } - if (info != 0) { - xerbla_("DSYR2K", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.) { - if (upper) { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - } else { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*B' + alpha*B*A' + C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L90: */ - } - } else if (*beta != 1.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L100: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { - temp1 = *alpha * b[j + l * b_dim1]; - temp2 = *alpha * a[j + l * a_dim1]; - i__3 = j; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ - i__ + l * a_dim1] * temp1 + b[i__ + l * - b_dim1] * temp2; -/* L110: */ - } - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L140: */ - } - } else if (*beta != 1.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L150: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { - temp1 = *alpha * b[j + l * b_dim1]; - temp2 = *alpha * a[j + l * a_dim1]; - i__3 = *n; - for (i__ = j; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ - i__ + l * a_dim1] * temp1 + b[i__ + l * - b_dim1] * temp2; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*A'*B + alpha*B'*A + C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp1 = 0.; - temp2 = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; - temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; -/* L190: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * - temp2; - } else { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] - + *alpha * temp1 + *alpha * temp2; - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp1 = 0.; - temp2 = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; - temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; -/* L220: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * - temp2; - } else { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] - + *alpha * temp1 + *alpha * temp2; - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of DSYR2K. */ - -} /* dsyr2k_ */ - -/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, - doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, - doublereal *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, info; - static doublereal temp; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - DSYRK performs one of the symmetric rank k operations - - C := alpha*A*A' + beta*C, - - or - - C := alpha*A'*A + beta*C, - - where alpha and beta are scalars, C is an n by n symmetric matrix - and A is an n by k matrix in the first case and a k by n matrix - in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. - - TRANS = 'T' or 't' C := alpha*A'*A + beta*C. - - TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrix A, and on entry with - TRANS = 'T' or 't' or 'C' or 'c', K specifies the number - of rows of the matrix A. K must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldc < max(1,*n)) { - info = 10; - } - if (info != 0) { - xerbla_("DSYRK ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.) { - if (upper) { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - } else { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*A' + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L90: */ - } - } else if (*beta != 1.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L100: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.) { - temp = *alpha * a[j + l * a_dim1]; - i__3 = j; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L110: */ - } - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.; -/* L140: */ - } - } else if (*beta != 1.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L150: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.) { - temp = *alpha * a[j + l * a_dim1]; - i__3 = *n; - for (i__ = j; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*A'*A + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; -/* L190: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; -/* L220: */ - } - if (*beta == 0.) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of DSYRK . */ - -} /* dsyrk_ */ - -/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, doublereal *alpha, doublereal *a, integer * - lda, doublereal *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, k, info; - static doublereal temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - DTRMM performs one of the matrix-matrix operations - - B := alpha*op( A )*B, or B := alpha*B*op( A ), - - where alpha is a scalar, B is an m by n matrix, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A'. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) multiplies B from - the left or right as follows: - - SIDE = 'L' or 'l' B := alpha*op( A )*B. - - SIDE = 'R' or 'r' B := alpha*B*op( A ). - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = A'. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the matrix B, and on exit is overwritten by the - transformed matrix. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("DTRMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*A*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - if (b[k + j * b_dim1] != 0.) { - temp = *alpha * b[k + j * b_dim1]; - i__3 = k - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * a[i__ + k * - a_dim1]; -/* L30: */ - } - if (nounit) { - temp *= a[k + k * a_dim1]; - } - b[k + j * b_dim1] = temp; - } -/* L40: */ - } -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (k = *m; k >= 1; --k) { - if (b[k + j * b_dim1] != 0.) { - temp = *alpha * b[k + j * b_dim1]; - b[k + j * b_dim1] = temp; - if (nounit) { - b[k + j * b_dim1] *= a[k + k * a_dim1]; - } - i__2 = *m; - for (i__ = k + 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * a[i__ + k * - a_dim1]; -/* L60: */ - } - } -/* L70: */ - } -/* L80: */ - } - } - } else { - -/* Form B := alpha*A'*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - temp = b[i__ + j * b_dim1]; - if (nounit) { - temp *= a[i__ + i__ * a_dim1]; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L90: */ - } - b[i__ + j * b_dim1] = *alpha * temp; -/* L100: */ - } -/* L110: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = b[i__ + j * b_dim1]; - if (nounit) { - temp *= a[i__ + i__ * a_dim1]; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L120: */ - } - b[i__ + j * b_dim1] = *alpha * temp; -/* L130: */ - } -/* L140: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*A. */ - - if (upper) { - for (j = *n; j >= 1; --j) { - temp = *alpha; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L150: */ - } - i__1 = j - 1; - for (k = 1; k <= i__1; ++k) { - if (a[k + j * a_dim1] != 0.) { - temp = *alpha * a[k + j * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = *alpha; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L190: */ - } - i__2 = *n; - for (k = j + 1; k <= i__2; ++k) { - if (a[k + j * a_dim1] != 0.) { - temp = *alpha * a[k + j * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L200: */ - } - } -/* L210: */ - } -/* L220: */ - } - } - } else { - -/* Form B := alpha*B*A'. */ - - if (upper) { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - i__2 = k - 1; - for (j = 1; j <= i__2; ++j) { - if (a[j + k * a_dim1] != 0.) { - temp = *alpha * a[j + k * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L230: */ - } - } -/* L240: */ - } - temp = *alpha; - if (nounit) { - temp *= a[k + k * a_dim1]; - } - if (temp != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L250: */ - } - } -/* L260: */ - } - } else { - for (k = *n; k >= 1; --k) { - i__1 = *n; - for (j = k + 1; j <= i__1; ++j) { - if (a[j + k * a_dim1] != 0.) { - temp = *alpha * a[j + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L270: */ - } - } -/* L280: */ - } - temp = *alpha; - if (nounit) { - temp *= a[k + k * a_dim1]; - } - if (temp != 1.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L290: */ - } - } -/* L300: */ - } - } - } - } - - return 0; - -/* End of DTRMM . */ - -} /* dtrmm_ */ - -/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, - doublereal *a, integer *lda, doublereal *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static doublereal temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - DTRMV performs one of the matrix-vector operations - - x := A*x, or x := A'*x, - - where x is an n element vector and A is an n by n unit, or non-unit, - upper or lower triangular matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' x := A*x. - - TRANS = 'T' or 't' x := A'*x. - - TRANS = 'C' or 'c' x := A'*x. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - DOUBLE PRECISION array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. On exit, X is overwritten with the - tranformed vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("DTRMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.) { - temp = x[j]; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - x[i__] += temp * a[i__ + j * a_dim1]; -/* L10: */ - } - if (nounit) { - x[j] *= a[j + j * a_dim1]; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - x[ix] += temp * a[i__ + j * a_dim1]; - ix += *incx; -/* L30: */ - } - if (nounit) { - x[jx] *= a[j + j * a_dim1]; - } - } - jx += *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.) { - temp = x[j]; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - x[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - if (nounit) { - x[j] *= a[j + j * a_dim1]; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.) { - temp = x[jx]; - ix = kx; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - x[ix] += temp * a[i__ + j * a_dim1]; - ix -= *incx; -/* L70: */ - } - if (nounit) { - x[jx] *= a[j + j * a_dim1]; - } - } - jx -= *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := A'*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - for (i__ = j - 1; i__ >= 1; --i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - x[j] = temp; -/* L100: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - ix = jx; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - temp += a[i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - x[jx] = temp; - jx -= *incx; -/* L120: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = x[j]; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L130: */ - } - x[j] = temp; -/* L140: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = x[jx]; - ix = jx; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - temp += a[i__ + j * a_dim1] * x[ix]; -/* L150: */ - } - x[jx] = temp; - jx += *incx; -/* L160: */ - } - } - } - } - - return 0; - -/* End of DTRMV . */ - -} /* dtrmv_ */ - -/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, doublereal *alpha, doublereal *a, integer * - lda, doublereal *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, k, info; - static doublereal temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - DTRSM solves one of the matrix equations - - op( A )*X = alpha*B, or X*op( A ) = alpha*B, - - where alpha is a scalar, X and B are m by n matrices, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A'. - - The matrix X is overwritten on B. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) appears on the left - or right of X as follows: - - SIDE = 'L' or 'l' op( A )*X = alpha*B. - - SIDE = 'R' or 'r' X*op( A ) = alpha*B. - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = A'. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION. - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the right-hand side matrix B, and on exit is - overwritten by the solution matrix X. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("DTRSM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*inv( A )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L30: */ - } - } - for (k = *m; k >= 1; --k) { - if (b[k + j * b_dim1] != 0.) { - if (nounit) { - b[k + j * b_dim1] /= a[k + k * a_dim1]; - } - i__2 = k - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ - i__ + k * a_dim1]; -/* L40: */ - } - } -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L70: */ - } - } - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - if (b[k + j * b_dim1] != 0.) { - if (nounit) { - b[k + j * b_dim1] /= a[k + k * a_dim1]; - } - i__3 = *m; - for (i__ = k + 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ - i__ + k * a_dim1]; -/* L80: */ - } - } -/* L90: */ - } -/* L100: */ - } - } - } else { - -/* Form B := alpha*inv( A' )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = *alpha * b[i__ + j * b_dim1]; - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L110: */ - } - if (nounit) { - temp /= a[i__ + i__ * a_dim1]; - } - b[i__ + j * b_dim1] = temp; -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - temp = *alpha * b[i__ + j * b_dim1]; - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L140: */ - } - if (nounit) { - temp /= a[i__ + i__ * a_dim1]; - } - b[i__ + j * b_dim1] = temp; -/* L150: */ - } -/* L160: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*inv( A ). */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L170: */ - } - } - i__2 = j - 1; - for (k = 1; k <= i__2; ++k) { - if (a[k + j * a_dim1] != 0.) { - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ - i__ + k * b_dim1]; -/* L180: */ - } - } -/* L190: */ - } - if (nounit) { - temp = 1. / a[j + j * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L200: */ - } - } -/* L210: */ - } - } else { - for (j = *n; j >= 1; --j) { - if (*alpha != 1.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L220: */ - } - } - i__1 = *n; - for (k = j + 1; k <= i__1; ++k) { - if (a[k + j * a_dim1] != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ - i__ + k * b_dim1]; -/* L230: */ - } - } -/* L240: */ - } - if (nounit) { - temp = 1. / a[j + j * a_dim1]; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L250: */ - } - } -/* L260: */ - } - } - } else { - -/* Form B := alpha*B*inv( A' ). */ - - if (upper) { - for (k = *n; k >= 1; --k) { - if (nounit) { - temp = 1. / a[k + k * a_dim1]; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L270: */ - } - } - i__1 = k - 1; - for (j = 1; j <= i__1; ++j) { - if (a[j + k * a_dim1] != 0.) { - temp = a[j + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= temp * b[i__ + k * - b_dim1]; -/* L280: */ - } - } -/* L290: */ - } - if (*alpha != 1.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] - ; -/* L300: */ - } - } -/* L310: */ - } - } else { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - if (nounit) { - temp = 1. / a[k + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L320: */ - } - } - i__2 = *n; - for (j = k + 1; j <= i__2; ++j) { - if (a[j + k * a_dim1] != 0.) { - temp = a[j + k * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= temp * b[i__ + k * - b_dim1]; -/* L330: */ - } - } -/* L340: */ - } - if (*alpha != 1.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] - ; -/* L350: */ - } - } -/* L360: */ - } - } - } - } - - return 0; - -/* End of DTRSM . */ - -} /* dtrsm_ */ - -doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx) -{ - /* System generated locals */ - integer i__1; - doublereal ret_val; - - /* Local variables */ - static integer i__, ix; - static doublereal stemp; - extern doublereal dcabs1_(doublecomplex *); - - -/* - takes the sum of the absolute values. - jack dongarra, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zx; - - /* Function Body */ - ret_val = 0.; - stemp = 0.; - if (*n <= 0 || *incx <= 0) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp += dcabs1_(&zx[ix]); - ix += *incx; -/* L10: */ - } - ret_val = stemp; - return ret_val; - -/* code for increment equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp += dcabs1_(&zx[i__]); -/* L30: */ - } - ret_val = stemp; - return ret_val; -} /* dzasum_ */ - -doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublereal ret_val, d__1; - - /* Builtin functions */ - double d_imag(doublecomplex *), sqrt(doublereal); - - /* Local variables */ - static integer ix; - static doublereal ssq, temp, norm, scale; - - -/* - DZNRM2 returns the euclidean norm of a vector via the function - name, so that - - DZNRM2 := sqrt( conjg( x' )*x ) - - - -- This version written on 25-October-1982. - Modified on 14-October-1993 to inline the call to ZLASSQ. - Sven Hammarling, Nag Ltd. -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n < 1 || *incx < 1) { - norm = 0.; - } else { - scale = 0.; - ssq = 1.; -/* - The following loop is equivalent to this call to the LAPACK - auxiliary routine: - CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) -*/ - - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - i__3 = ix; - if (x[i__3].r != 0.) { - i__3 = ix; - temp = (d__1 = x[i__3].r, abs(d__1)); - if (scale < temp) { -/* Computing 2nd power */ - d__1 = scale / temp; - ssq = ssq * (d__1 * d__1) + 1.; - scale = temp; - } else { -/* Computing 2nd power */ - d__1 = temp / scale; - ssq += d__1 * d__1; - } - } - if (d_imag(&x[ix]) != 0.) { - temp = (d__1 = d_imag(&x[ix]), abs(d__1)); - if (scale < temp) { -/* Computing 2nd power */ - d__1 = scale / temp; - ssq = ssq * (d__1 * d__1) + 1.; - scale = temp; - } else { -/* Computing 2nd power */ - d__1 = temp / scale; - ssq += d__1 * d__1; - } - } -/* L10: */ - } - norm = scale * sqrt(ssq); - } - - ret_val = norm; - return ret_val; - -/* End of DZNRM2. */ - -} /* dznrm2_ */ - -integer icamax_(integer *n, complex *cx, integer *incx) -{ - /* System generated locals */ - integer ret_val, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, ix; - static real smax; - - -/* - finds the index of element having max. absolute value. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cx; - - /* Function Body */ - ret_val = 0; - if (*n < 1 || *incx <= 0) { - return ret_val; - } - ret_val = 1; - if (*n == 1) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2)); - ix += *incx; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = ix; - if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs( - r__2)) <= smax) { - goto L5; - } - ret_val = i__; - i__2 = ix; - smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), - dabs(r__2)); -L5: - ix += *incx; -/* L10: */ - } - return ret_val; - -/* code for increment equal to 1 */ - -L20: - smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2)); - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = i__; - if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs( - r__2)) <= smax) { - goto L30; - } - ret_val = i__; - i__2 = i__; - smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), - dabs(r__2)); -L30: - ; - } - return ret_val; -} /* icamax_ */ - -integer idamax_(integer *n, doublereal *dx, integer *incx) -{ - /* System generated locals */ - integer ret_val, i__1; - doublereal d__1; - - /* Local variables */ - static integer i__, ix; - static doublereal dmax__; - - -/* - finds the index of element having max. absolute value. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --dx; - - /* Function Body */ - ret_val = 0; - if (*n < 1 || *incx <= 0) { - return ret_val; - } - ret_val = 1; - if (*n == 1) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - dmax__ = abs(dx[1]); - ix += *incx; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if ((d__1 = dx[ix], abs(d__1)) <= dmax__) { - goto L5; - } - ret_val = i__; - dmax__ = (d__1 = dx[ix], abs(d__1)); -L5: - ix += *incx; -/* L10: */ - } - return ret_val; - -/* code for increment equal to 1 */ - -L20: - dmax__ = abs(dx[1]); - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if ((d__1 = dx[i__], abs(d__1)) <= dmax__) { - goto L30; - } - ret_val = i__; - dmax__ = (d__1 = dx[i__], abs(d__1)); -L30: - ; - } - return ret_val; -} /* idamax_ */ - -integer isamax_(integer *n, real *sx, integer *incx) -{ - /* System generated locals */ - integer ret_val, i__1; - real r__1; - - /* Local variables */ - static integer i__, ix; - static real smax; - - -/* - finds the index of element having max. absolute value. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sx; - - /* Function Body */ - ret_val = 0; - if (*n < 1 || *incx <= 0) { - return ret_val; - } - ret_val = 1; - if (*n == 1) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - smax = dabs(sx[1]); - ix += *incx; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if ((r__1 = sx[ix], dabs(r__1)) <= smax) { - goto L5; - } - ret_val = i__; - smax = (r__1 = sx[ix], dabs(r__1)); -L5: - ix += *incx; -/* L10: */ - } - return ret_val; - -/* code for increment equal to 1 */ - -L20: - smax = dabs(sx[1]); - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if ((r__1 = sx[i__], dabs(r__1)) <= smax) { - goto L30; - } - ret_val = i__; - smax = (r__1 = sx[i__], dabs(r__1)); -L30: - ; - } - return ret_val; -} /* isamax_ */ - -integer izamax_(integer *n, doublecomplex *zx, integer *incx) -{ - /* System generated locals */ - integer ret_val, i__1; - - /* Local variables */ - static integer i__, ix; - static doublereal smax; - extern doublereal dcabs1_(doublecomplex *); - - -/* - finds the index of element having max. absolute value. - jack dongarra, 1/15/85. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zx; - - /* Function Body */ - ret_val = 0; - if (*n < 1 || *incx <= 0) { - return ret_val; - } - ret_val = 1; - if (*n == 1) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - smax = dcabs1_(&zx[1]); - ix += *incx; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if (dcabs1_(&zx[ix]) <= smax) { - goto L5; - } - ret_val = i__; - smax = dcabs1_(&zx[ix]); -L5: - ix += *incx; -/* L10: */ - } - return ret_val; - -/* code for increment equal to 1 */ - -L20: - smax = dcabs1_(&zx[1]); - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - if (dcabs1_(&zx[i__]) <= smax) { - goto L30; - } - ret_val = i__; - smax = dcabs1_(&zx[i__]); -L30: - ; - } - return ret_val; -} /* izamax_ */ - -logical lsame_(char *ca, char *cb) -{ - /* System generated locals */ - logical ret_val; - - /* Local variables */ - static integer inta, intb, zcode; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - LSAME returns .TRUE. if CA is the same letter as CB regardless of - case. - - Arguments - ========= - - CA (input) CHARACTER*1 - CB (input) CHARACTER*1 - CA and CB specify the single characters to be compared. - - ===================================================================== - - - Test if the characters are equal -*/ - - ret_val = *(unsigned char *)ca == *(unsigned char *)cb; - if (ret_val) { - return ret_val; - } - -/* Now test for equivalence if both characters are alphabetic. */ - - zcode = 'Z'; - -/* - Use 'Z' rather than 'A' so that ASCII can be detected on Prime - machines, on which ICHAR returns a value with bit 8 set. - ICHAR('A') on Prime machines returns 193 which is the same as - ICHAR('A') on an EBCDIC machine. -*/ - - inta = *(unsigned char *)ca; - intb = *(unsigned char *)cb; - - if (zcode == 90 || zcode == 122) { - -/* - ASCII is assumed - ZCODE is the ASCII code of either lower or - upper case 'Z'. -*/ - - if (inta >= 97 && inta <= 122) { - inta += -32; - } - if (intb >= 97 && intb <= 122) { - intb += -32; - } - - } else if (zcode == 233 || zcode == 169) { - -/* - EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or - upper case 'Z'. -*/ - - if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta - >= 162 && inta <= 169) { - inta += 64; - } - if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb - >= 162 && intb <= 169) { - intb += 64; - } - - } else if (zcode == 218 || zcode == 250) { - -/* - ASCII is assumed, on Prime machines - ZCODE is the ASCII code - plus 128 of either lower or upper case 'Z'. -*/ - - if (inta >= 225 && inta <= 250) { - inta += -32; - } - if (intb >= 225 && intb <= 250) { - intb += -32; - } - } - ret_val = inta == intb; - -/* - RETURN - - End of LSAME -*/ - - return ret_val; -} /* lsame_ */ - -/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, - real *sy, integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - - -/* - constant times a vector plus a vector. - uses unrolled loop for increments equal to one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sy; - --sx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*sa == 0.f) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - sy[iy] += *sa * sx[ix]; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 4; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - sy[i__] += *sa * sx[i__]; -/* L30: */ - } - if (*n < 4) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 4) { - sy[i__] += *sa * sx[i__]; - sy[i__ + 1] += *sa * sx[i__ + 1]; - sy[i__ + 2] += *sa * sx[i__ + 2]; - sy[i__ + 3] += *sa * sx[i__ + 3]; -/* L50: */ - } - return 0; -} /* saxpy_ */ - -doublereal scasum_(integer *n, complex *cx, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - real ret_val, r__1, r__2; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, nincx; - static real stemp; - - -/* - takes the sum of the absolute values of a complex vector and - returns a single precision result. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --cx; - - /* Function Body */ - ret_val = 0.f; - stemp = 0.f; - if (*n <= 0 || *incx <= 0) { - return ret_val; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - i__3 = i__; - stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[ - i__]), dabs(r__2)); -/* L10: */ - } - ret_val = stemp; - return ret_val; - -/* code for increment equal to 1 */ - -L20: - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__1 = i__; - stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[ - i__]), dabs(r__2)); -/* L30: */ - } - ret_val = stemp; - return ret_val; -} /* scasum_ */ - -doublereal scnrm2_(integer *n, complex *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - real ret_val, r__1; - - /* Builtin functions */ - double r_imag(complex *), sqrt(doublereal); - - /* Local variables */ - static integer ix; - static real ssq, temp, norm, scale; - - -/* - SCNRM2 returns the euclidean norm of a vector via the function - name, so that - - SCNRM2 := sqrt( conjg( x' )*x ) - - - -- This version written on 25-October-1982. - Modified on 14-October-1993 to inline the call to CLASSQ. - Sven Hammarling, Nag Ltd. -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n < 1 || *incx < 1) { - norm = 0.f; - } else { - scale = 0.f; - ssq = 1.f; -/* - The following loop is equivalent to this call to the LAPACK - auxiliary routine: - CALL CLASSQ( N, X, INCX, SCALE, SSQ ) -*/ - - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - i__3 = ix; - if (x[i__3].r != 0.f) { - i__3 = ix; - temp = (r__1 = x[i__3].r, dabs(r__1)); - if (scale < temp) { -/* Computing 2nd power */ - r__1 = scale / temp; - ssq = ssq * (r__1 * r__1) + 1.f; - scale = temp; - } else { -/* Computing 2nd power */ - r__1 = temp / scale; - ssq += r__1 * r__1; - } - } - if (r_imag(&x[ix]) != 0.f) { - temp = (r__1 = r_imag(&x[ix]), dabs(r__1)); - if (scale < temp) { -/* Computing 2nd power */ - r__1 = scale / temp; - ssq = ssq * (r__1 * r__1) + 1.f; - scale = temp; - } else { -/* Computing 2nd power */ - r__1 = temp / scale; - ssq += r__1 * r__1; - } - } -/* L10: */ - } - norm = scale * sqrt(ssq); - } - - ret_val = norm; - return ret_val; - -/* End of SCNRM2. */ - -} /* scnrm2_ */ - -/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, - integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - - -/* - copies a vector, x, to a vector, y. - uses unrolled loops for increments equal to 1. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sy; - --sx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - sy[iy] = sx[ix]; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 7; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - sy[i__] = sx[i__]; -/* L30: */ - } - if (*n < 7) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 7) { - sy[i__] = sx[i__]; - sy[i__ + 1] = sx[i__ + 1]; - sy[i__ + 2] = sx[i__ + 2]; - sy[i__ + 3] = sx[i__ + 3]; - sy[i__ + 4] = sx[i__ + 4]; - sy[i__ + 5] = sx[i__ + 5]; - sy[i__ + 6] = sx[i__ + 6]; -/* L50: */ - } - return 0; -} /* scopy_ */ - -doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) -{ - /* System generated locals */ - integer i__1; - real ret_val; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - static real stemp; - - -/* - forms the dot product of two vectors. - uses unrolled loops for increments equal to one. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sy; - --sx; - - /* Function Body */ - stemp = 0.f; - ret_val = 0.f; - if (*n <= 0) { - return ret_val; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp += sx[ix] * sy[iy]; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val = stemp; - return ret_val; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 5; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp += sx[i__] * sy[i__]; -/* L30: */ - } - if (*n < 5) { - goto L60; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 5) { - stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ - i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + - 4] * sy[i__ + 4]; -/* L50: */ - } -L60: - ret_val = stemp; - return ret_val; -} /* sdot_ */ - -/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * - n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * - ldb, real *beta, real *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__, j, l, info; - static logical nota, notb; - static real temp; - static integer ncola; - extern logical lsame_(char *, char *); - static integer nrowa, nrowb; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SGEMM performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X', - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - Parameters - ========== - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n', op( A ) = A. - - TRANSA = 'T' or 't', op( A ) = A'. - - TRANSA = 'C' or 'c', op( A ) = A'. - - Unchanged on exit. - - TRANSB - CHARACTER*1. - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - - TRANSB = 'N' or 'n', op( B ) = B. - - TRANSB = 'T' or 't', op( B ) = B'. - - TRANSB = 'C' or 'c', op( B ) = B'. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, ka ), where ka is - k when TRANSA = 'N' or 'n', and is m otherwise. - Before entry with TRANSA = 'N' or 'n', the leading m by k - part of the array A must contain the matrix A, otherwise - the leading k by m part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANSA = 'N' or 'n' then - LDA must be at least max( 1, m ), otherwise LDA must be at - least max( 1, k ). - Unchanged on exit. - - B - REAL array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANSB = 'N' or 'n' then - LDB must be at least max( 1, k ), otherwise LDB must be at - least max( 1, n ). - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - Unchanged on exit. - - C - REAL array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*op( B ) + beta*C ). - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Set NOTA and NOTB as true if A and B respectively are not - transposed and set NROWA, NCOLA and NROWB as the number of rows - and columns of A and the number of rows of B respectively. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - nota = lsame_(transa, "N"); - notb = lsame_(transb, "N"); - if (nota) { - nrowa = *m; - ncola = *k; - } else { - nrowa = *k; - ncola = *m; - } - if (notb) { - nrowb = *k; - } else { - nrowb = *n; - } - -/* Test the input parameters. */ - - info = 0; - if (! nota && ! lsame_(transa, "C") && ! lsame_( - transa, "T")) { - info = 1; - } else if (! notb && ! lsame_(transb, "C") && ! - lsame_(transb, "T")) { - info = 2; - } else if (*m < 0) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < max(1,nrowa)) { - info = 8; - } else if (*ldb < max(1,nrowb)) { - info = 10; - } else if (*ldc < max(1,*m)) { - info = 13; - } - if (info != 0) { - xerbla_("SGEMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { - return 0; - } - -/* And if alpha.eq.zero. */ - - if (*alpha == 0.f) { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - return 0; - } - -/* Start the operations. */ - - if (notb) { - if (nota) { - -/* Form C := alpha*A*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L50: */ - } - } else if (*beta != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L60: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (b[l + j * b_dim1] != 0.f) { - temp = *alpha * b[l + j * b_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L70: */ - } - } -/* L80: */ - } -/* L90: */ - } - } else { - -/* Form C := alpha*A'*B + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; -/* L100: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L110: */ - } -/* L120: */ - } - } - } else { - if (nota) { - -/* Form C := alpha*A*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L130: */ - } - } else if (*beta != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L140: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (b[j + l * b_dim1] != 0.f) { - temp = *alpha * b[j + l * b_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L150: */ - } - } -/* L160: */ - } -/* L170: */ - } - } else { - -/* Form C := alpha*A'*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; -/* L180: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L190: */ - } -/* L200: */ - } - } - } - - return 0; - -/* End of SGEMM . */ - -} /* sgemm_ */ - -/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, - real *a, integer *lda, real *x, integer *incx, real *beta, real *y, - integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static real temp; - static integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SGEMV performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - Parameters - ========== - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - X - REAL array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - REAL array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("SGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { - return 0; - } - -/* - Set LENX and LENY, the lengths of the vectors x and y, and set - up the start points in X and Y. -*/ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. - - First form y := beta*y. -*/ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = *alpha * x[jx]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = *alpha * x[jx]; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp * a[i__ + j * a_dim1]; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.f; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L100: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = 0.f; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; -/* L110: */ - } - y[jy] += *alpha * temp; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of SGEMV . */ - -} /* sgemv_ */ - -/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, - integer *incx, real *y, integer *incy, real *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static real temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SGER performs the rank 1 operation - - A := alpha*x*y' + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - REAL array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("SGER ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || *alpha == 0.f) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.f) { - temp = *alpha * y[jy]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[i__] * temp; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (y[jy] != 0.f) { - temp = *alpha * y[jy]; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] += x[ix] * temp; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of SGER . */ - -} /* sger_ */ - -doublereal snrm2_(integer *n, real *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - real ret_val, r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer ix; - static real ssq, norm, scale, absxi; - - -/* - SNRM2 returns the euclidean norm of a vector via the function - name, so that - - SNRM2 := sqrt( x'*x ) - - - -- This version written on 25-October-1982. - Modified on 14-October-1993 to inline the call to SLASSQ. - Sven Hammarling, Nag Ltd. -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n < 1 || *incx < 1) { - norm = 0.f; - } else if (*n == 1) { - norm = dabs(x[1]); - } else { - scale = 0.f; - ssq = 1.f; -/* - The following loop is equivalent to this call to the LAPACK - auxiliary routine: - CALL SLASSQ( N, X, INCX, SCALE, SSQ ) -*/ - - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - if (x[ix] != 0.f) { - absxi = (r__1 = x[ix], dabs(r__1)); - if (scale < absxi) { -/* Computing 2nd power */ - r__1 = scale / absxi; - ssq = ssq * (r__1 * r__1) + 1.f; - scale = absxi; - } else { -/* Computing 2nd power */ - r__1 = absxi / scale; - ssq += r__1 * r__1; - } - } -/* L10: */ - } - norm = scale * sqrt(ssq); - } - - ret_val = norm; - return ret_val; - -/* End of SNRM2. */ - -} /* snrm2_ */ - -/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, - integer *incy, real *c__, real *s) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, ix, iy; - static real stemp; - - -/* - applies a plane rotation. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sy; - --sx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp = *c__ * sx[ix] + *s * sy[iy]; - sy[iy] = *c__ * sy[iy] - *s * sx[ix]; - sx[ix] = stemp; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp = *c__ * sx[i__] + *s * sy[i__]; - sy[i__] = *c__ * sy[i__] - *s * sx[i__]; - sx[i__] = stemp; -/* L30: */ - } - return 0; -} /* srot_ */ - -/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - static integer i__, m, mp1, nincx; - - -/* - scales a vector by a constant. - uses unrolled loops for increment equal to 1. - jack dongarra, linpack, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - nincx = *n * *incx; - i__1 = nincx; - i__2 = *incx; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - sx[i__] = *sa * sx[i__]; -/* L10: */ - } - return 0; - -/* - code for increment equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 5; - if (m == 0) { - goto L40; - } - i__2 = m; - for (i__ = 1; i__ <= i__2; ++i__) { - sx[i__] = *sa * sx[i__]; -/* L30: */ - } - if (*n < 5) { - return 0; - } -L40: - mp1 = m + 1; - i__2 = *n; - for (i__ = mp1; i__ <= i__2; i__ += 5) { - sx[i__] = *sa * sx[i__]; - sx[i__ + 1] = *sa * sx[i__ + 1]; - sx[i__ + 2] = *sa * sx[i__ + 2]; - sx[i__ + 3] = *sa * sx[i__ + 3]; - sx[i__ + 4] = *sa * sx[i__ + 4]; -/* L50: */ - } - return 0; -} /* sscal_ */ - -/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy, - integer *incy) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, m, ix, iy, mp1; - static real stemp; - - -/* - interchanges two vectors. - uses unrolled loops for increments equal to 1. - jack dongarra, linpack, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --sy; - --sx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp = sx[ix]; - sx[ix] = sy[iy]; - sy[iy] = stemp; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* - code for both increments equal to 1 - - - clean-up loop -*/ - -L20: - m = *n % 3; - if (m == 0) { - goto L40; - } - i__1 = m; - for (i__ = 1; i__ <= i__1; ++i__) { - stemp = sx[i__]; - sx[i__] = sy[i__]; - sy[i__] = stemp; -/* L30: */ - } - if (*n < 3) { - return 0; - } -L40: - mp1 = m + 1; - i__1 = *n; - for (i__ = mp1; i__ <= i__1; i__ += 3) { - stemp = sx[i__]; - sx[i__] = sy[i__]; - sy[i__] = stemp; - stemp = sx[i__ + 1]; - sx[i__ + 1] = sy[i__ + 1]; - sy[i__ + 1] = stemp; - stemp = sx[i__ + 2]; - sx[i__ + 2] = sy[i__ + 2]; - sy[i__ + 2] = stemp; -/* L50: */ - } - return 0; -} /* sswap_ */ - -/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, - integer *lda, real *x, integer *incx, real *beta, real *y, integer * - incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static real temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SSYMV performs the matrix-vector operation - - y := alpha*A*x + beta*y, - - where alpha and beta are scalars, x and y are n element vectors and - A is an n by n symmetric matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of A is not referenced. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. On exit, Y is overwritten by the updated - vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*lda < max(1,*n)) { - info = 5; - } else if (*incx == 0) { - info = 7; - } else if (*incy == 0) { - info = 10; - } - if (info != 0) { - xerbla_("SSYMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || *alpha == 0.f && *beta == 1.f) { - return 0; - } - -/* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. - - First form y := beta*y. -*/ - - if (*beta != 1.f) { - if (*incy == 1) { - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = 0.f; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[i__] = *beta * y[i__]; -/* L20: */ - } - } - } else { - iy = ky; - if (*beta == 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = 0.f; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - y[iy] = *beta * y[iy]; - iy += *incy; -/* L40: */ - } - } - } - } - if (*alpha == 0.f) { - return 0; - } - if (lsame_(uplo, "U")) { - -/* Form y when A is stored in upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[i__]; -/* L50: */ - } - y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - y[iy] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[ix]; - ix += *incx; - iy += *incy; -/* L70: */ - } - y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } else { - -/* Form y when A is stored in lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[j]; - temp2 = 0.f; - y[j] += temp1 * a[j + j * a_dim1]; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - y[i__] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - y[j] += *alpha * temp2; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp1 = *alpha * x[jx]; - temp2 = 0.f; - y[jy] += temp1 * a[j + j * a_dim1]; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - y[iy] += temp1 * a[i__ + j * a_dim1]; - temp2 += a[i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - y[jy] += *alpha * temp2; - jx += *incx; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of SSYMV . */ - -} /* ssymv_ */ - -/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, - integer *incx, real *y, integer *incy, real *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static real temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SSYR2 performs the symmetric rank 2 operation - - A := alpha*x*y' + alpha*y*x' + A, - - where alpha is a scalar, x and y are n element vectors and A is an n - by n symmetric matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of A is not referenced. On exit, the - upper triangular part of the array A is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of A is not referenced. On exit, the - lower triangular part of the array A is overwritten by the - lower triangular part of the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*n)) { - info = 9; - } - if (info != 0) { - xerbla_("SSYR2 ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || *alpha == 0.f) { - return 0; - } - -/* - Set up the start points in X and Y if the increments are not both - unity. -*/ - - if (*incx != 1 || *incy != 1) { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - jx = kx; - jy = ky; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. -*/ - - if (lsame_(uplo, "U")) { - -/* Form A when A is stored in the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.f || y[j] != 0.f) { - temp1 = *alpha * y[j]; - temp2 = *alpha * x[j]; - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * - temp1 + y[i__] * temp2; -/* L10: */ - } - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f || y[jy] != 0.f) { - temp1 = *alpha * y[jy]; - temp2 = *alpha * x[jx]; - ix = kx; - iy = ky; - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * - temp1 + y[iy] * temp2; - ix += *incx; - iy += *incy; -/* L30: */ - } - } - jx += *incx; - jy += *incy; -/* L40: */ - } - } - } else { - -/* Form A when A is stored in the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.f || y[j] != 0.f) { - temp1 = *alpha * y[j]; - temp2 = *alpha * x[j]; - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * - temp1 + y[i__] * temp2; -/* L50: */ - } - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f || y[jy] != 0.f) { - temp1 = *alpha * y[jy]; - temp2 = *alpha * x[jx]; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * - temp1 + y[iy] * temp2; - ix += *incx; - iy += *incy; -/* L70: */ - } - } - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } - - return 0; - -/* End of SSYR2 . */ - -} /* ssyr2_ */ - -/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k, - real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, - real *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__, j, l, info; - static real temp1, temp2; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SSYR2K performs one of the symmetric rank 2k operations - - C := alpha*A*B' + alpha*B*A' + beta*C, - - or - - C := alpha*A'*B + alpha*B'*A + beta*C, - - where alpha and beta are scalars, C is an n by n symmetric matrix - and A and B are n by k matrices in the first case and k by n - matrices in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + - beta*C. - - TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + - beta*C. - - TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + - beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrices A and B, and on entry with - TRANS = 'T' or 't' or 'C' or 'c', K specifies the number - of rows of the matrices A and B. K must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - B - REAL array of DIMENSION ( LDB, kb ), where kb is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array B must contain the matrix B, otherwise - the leading k by n part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDB must be at least max( 1, n ), otherwise LDB must - be at least max( 1, k ). - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - REAL array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldb < max(1,nrowa)) { - info = 9; - } else if (*ldc < max(1,*n)) { - info = 12; - } - if (info != 0) { - xerbla_("SSYR2K", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.f) { - if (upper) { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - } else { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*B' + alpha*B*A' + C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L90: */ - } - } else if (*beta != 1.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L100: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) - { - temp1 = *alpha * b[j + l * b_dim1]; - temp2 = *alpha * a[j + l * a_dim1]; - i__3 = j; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ - i__ + l * a_dim1] * temp1 + b[i__ + l * - b_dim1] * temp2; -/* L110: */ - } - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L140: */ - } - } else if (*beta != 1.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L150: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) - { - temp1 = *alpha * b[j + l * b_dim1]; - temp2 = *alpha * a[j + l * a_dim1]; - i__3 = *n; - for (i__ = j; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ - i__ + l * a_dim1] * temp1 + b[i__ + l * - b_dim1] * temp2; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*A'*B + alpha*B'*A + C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp1 = 0.f; - temp2 = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; - temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; -/* L190: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * - temp2; - } else { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] - + *alpha * temp1 + *alpha * temp2; - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp1 = 0.f; - temp2 = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; - temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; -/* L220: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * - temp2; - } else { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] - + *alpha * temp1 + *alpha * temp2; - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of SSYR2K. */ - -} /* ssyr2k_ */ - -/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, - real *alpha, real *a, integer *lda, real *beta, real *c__, integer * - ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, info; - static real temp; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - SSYRK performs one of the symmetric rank k operations - - C := alpha*A*A' + beta*C, - - or - - C := alpha*A'*A + beta*C, - - where alpha and beta are scalars, C is an n by n symmetric matrix - and A is an n by k matrix in the first case and a k by n matrix - in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. - - TRANS = 'T' or 't' C := alpha*A'*A + beta*C. - - TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrix A, and on entry with - TRANS = 'T' or 't' or 'C' or 'c', K specifies the number - of rows of the matrix A. K must be at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - BETA - REAL . - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - REAL array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the symmetric matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the symmetric matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldc < max(1,*n)) { - info = 10; - } - if (info != 0) { - xerbla_("SSYRK ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.f) { - if (upper) { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L30: */ - } -/* L40: */ - } - } - } else { - if (*beta == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*A' + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L90: */ - } - } else if (*beta != 1.f) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L100: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.f) { - temp = *alpha * a[j + l * a_dim1]; - i__3 = j; - for (i__ = 1; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L110: */ - } - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = 0.f; -/* L140: */ - } - } else if (*beta != 1.f) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; -/* L150: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - if (a[j + l * a_dim1] != 0.f) { - temp = *alpha * a[j + l * a_dim1]; - i__3 = *n; - for (i__ = j; i__ <= i__3; ++i__) { - c__[i__ + j * c_dim1] += temp * a[i__ + l * - a_dim1]; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*A'*A + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; -/* L190: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp = 0.f; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; -/* L220: */ - } - if (*beta == 0.f) { - c__[i__ + j * c_dim1] = *alpha * temp; - } else { - c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ - i__ + j * c_dim1]; - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of SSYRK . */ - -} /* ssyrk_ */ - -/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, - integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, k, info; - static real temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - STRMM performs one of the matrix-matrix operations - - B := alpha*op( A )*B, or B := alpha*B*op( A ), - - where alpha is a scalar, B is an m by n matrix, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A'. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) multiplies B from - the left or right as follows: - - SIDE = 'L' or 'l' B := alpha*op( A )*B. - - SIDE = 'R' or 'r' B := alpha*B*op( A ). - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = A'. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - REAL array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the matrix B, and on exit is overwritten by the - transformed matrix. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("STRMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*A*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - if (b[k + j * b_dim1] != 0.f) { - temp = *alpha * b[k + j * b_dim1]; - i__3 = k - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * a[i__ + k * - a_dim1]; -/* L30: */ - } - if (nounit) { - temp *= a[k + k * a_dim1]; - } - b[k + j * b_dim1] = temp; - } -/* L40: */ - } -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (k = *m; k >= 1; --k) { - if (b[k + j * b_dim1] != 0.f) { - temp = *alpha * b[k + j * b_dim1]; - b[k + j * b_dim1] = temp; - if (nounit) { - b[k + j * b_dim1] *= a[k + k * a_dim1]; - } - i__2 = *m; - for (i__ = k + 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * a[i__ + k * - a_dim1]; -/* L60: */ - } - } -/* L70: */ - } -/* L80: */ - } - } - } else { - -/* Form B := alpha*A'*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - temp = b[i__ + j * b_dim1]; - if (nounit) { - temp *= a[i__ + i__ * a_dim1]; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L90: */ - } - b[i__ + j * b_dim1] = *alpha * temp; -/* L100: */ - } -/* L110: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = b[i__ + j * b_dim1]; - if (nounit) { - temp *= a[i__ + i__ * a_dim1]; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L120: */ - } - b[i__ + j * b_dim1] = *alpha * temp; -/* L130: */ - } -/* L140: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*A. */ - - if (upper) { - for (j = *n; j >= 1; --j) { - temp = *alpha; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L150: */ - } - i__1 = j - 1; - for (k = 1; k <= i__1; ++k) { - if (a[k + j * a_dim1] != 0.f) { - temp = *alpha * a[k + j * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = *alpha; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L190: */ - } - i__2 = *n; - for (k = j + 1; k <= i__2; ++k) { - if (a[k + j * a_dim1] != 0.f) { - temp = *alpha * a[k + j * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L200: */ - } - } -/* L210: */ - } -/* L220: */ - } - } - } else { - -/* Form B := alpha*B*A'. */ - - if (upper) { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - i__2 = k - 1; - for (j = 1; j <= i__2; ++j) { - if (a[j + k * a_dim1] != 0.f) { - temp = *alpha * a[j + k * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L230: */ - } - } -/* L240: */ - } - temp = *alpha; - if (nounit) { - temp *= a[k + k * a_dim1]; - } - if (temp != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L250: */ - } - } -/* L260: */ - } - } else { - for (k = *n; k >= 1; --k) { - i__1 = *n; - for (j = k + 1; j <= i__1; ++j) { - if (a[j + k * a_dim1] != 0.f) { - temp = *alpha * a[j + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] += temp * b[i__ + k * - b_dim1]; -/* L270: */ - } - } -/* L280: */ - } - temp = *alpha; - if (nounit) { - temp *= a[k + k * a_dim1]; - } - if (temp != 1.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L290: */ - } - } -/* L300: */ - } - } - } - } - - return 0; - -/* End of STRMM . */ - -} /* strmm_ */ - -/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n, - real *a, integer *lda, real *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static real temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - STRMV performs one of the matrix-vector operations - - x := A*x, or x := A'*x, - - where x is an n element vector and A is an n by n unit, or non-unit, - upper or lower triangular matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' x := A*x. - - TRANS = 'T' or 't' x := A'*x. - - TRANS = 'C' or 'c' x := A'*x. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. On exit, X is overwritten with the - tranformed vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("STRMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[j] != 0.f) { - temp = x[j]; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - x[i__] += temp * a[i__ + j * a_dim1]; -/* L10: */ - } - if (nounit) { - x[j] *= a[j + j * a_dim1]; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - x[ix] += temp * a[i__ + j * a_dim1]; - ix += *incx; -/* L30: */ - } - if (nounit) { - x[jx] *= a[j + j * a_dim1]; - } - } - jx += *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (x[j] != 0.f) { - temp = x[j]; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - x[i__] += temp * a[i__ + j * a_dim1]; -/* L50: */ - } - if (nounit) { - x[j] *= a[j + j * a_dim1]; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - if (x[jx] != 0.f) { - temp = x[jx]; - ix = kx; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - x[ix] += temp * a[i__ + j * a_dim1]; - ix -= *incx; -/* L70: */ - } - if (nounit) { - x[jx] *= a[j + j * a_dim1]; - } - } - jx -= *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := A'*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = x[j]; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - for (i__ = j - 1; i__ >= 1; --i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L90: */ - } - x[j] = temp; -/* L100: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - temp = x[jx]; - ix = jx; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - temp += a[i__ + j * a_dim1] * x[ix]; -/* L110: */ - } - x[jx] = temp; - jx -= *incx; -/* L120: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = x[j]; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - temp += a[i__ + j * a_dim1] * x[i__]; -/* L130: */ - } - x[j] = temp; -/* L140: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp = x[jx]; - ix = jx; - if (nounit) { - temp *= a[j + j * a_dim1]; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - temp += a[i__ + j * a_dim1] * x[ix]; -/* L150: */ - } - x[jx] = temp; - jx += *incx; -/* L160: */ - } - } - } - } - - return 0; - -/* End of STRMV . */ - -} /* strmv_ */ - -/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, - integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, k, info; - static real temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical nounit; - - -/* - Purpose - ======= - - STRSM solves one of the matrix equations - - op( A )*X = alpha*B, or X*op( A ) = alpha*B, - - where alpha is a scalar, X and B are m by n matrices, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A'. - - The matrix X is overwritten on B. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) appears on the left - or right of X as follows: - - SIDE = 'L' or 'l' op( A )*X = alpha*B. - - SIDE = 'R' or 'r' X*op( A ) = alpha*B. - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = A'. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - REAL . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - REAL array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the right-hand side matrix B, and on exit is - overwritten by the solution matrix X. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("STRSM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.f) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*inv( A )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L30: */ - } - } - for (k = *m; k >= 1; --k) { - if (b[k + j * b_dim1] != 0.f) { - if (nounit) { - b[k + j * b_dim1] /= a[k + k * a_dim1]; - } - i__2 = k - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ - i__ + k * a_dim1]; -/* L40: */ - } - } -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L70: */ - } - } - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - if (b[k + j * b_dim1] != 0.f) { - if (nounit) { - b[k + j * b_dim1] /= a[k + k * a_dim1]; - } - i__3 = *m; - for (i__ = k + 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ - i__ + k * a_dim1]; -/* L80: */ - } - } -/* L90: */ - } -/* L100: */ - } - } - } else { - -/* Form B := alpha*inv( A' )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = *alpha * b[i__ + j * b_dim1]; - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L110: */ - } - if (nounit) { - temp /= a[i__ + i__ * a_dim1]; - } - b[i__ + j * b_dim1] = temp; -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - temp = *alpha * b[i__ + j * b_dim1]; - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; -/* L140: */ - } - if (nounit) { - temp /= a[i__ + i__ * a_dim1]; - } - b[i__ + j * b_dim1] = temp; -/* L150: */ - } -/* L160: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*inv( A ). */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*alpha != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L170: */ - } - } - i__2 = j - 1; - for (k = 1; k <= i__2; ++k) { - if (a[k + j * a_dim1] != 0.f) { - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ - i__ + k * b_dim1]; -/* L180: */ - } - } -/* L190: */ - } - if (nounit) { - temp = 1.f / a[j + j * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L200: */ - } - } -/* L210: */ - } - } else { - for (j = *n; j >= 1; --j) { - if (*alpha != 1.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] - ; -/* L220: */ - } - } - i__1 = *n; - for (k = j + 1; k <= i__1; ++k) { - if (a[k + j * a_dim1] != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ - i__ + k * b_dim1]; -/* L230: */ - } - } -/* L240: */ - } - if (nounit) { - temp = 1.f / a[j + j * a_dim1]; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; -/* L250: */ - } - } -/* L260: */ - } - } - } else { - -/* Form B := alpha*B*inv( A' ). */ - - if (upper) { - for (k = *n; k >= 1; --k) { - if (nounit) { - temp = 1.f / a[k + k * a_dim1]; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L270: */ - } - } - i__1 = k - 1; - for (j = 1; j <= i__1; ++j) { - if (a[j + k * a_dim1] != 0.f) { - temp = a[j + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] -= temp * b[i__ + k * - b_dim1]; -/* L280: */ - } - } -/* L290: */ - } - if (*alpha != 1.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] - ; -/* L300: */ - } - } -/* L310: */ - } - } else { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - if (nounit) { - temp = 1.f / a[k + k * a_dim1]; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; -/* L320: */ - } - } - i__2 = *n; - for (j = k + 1; j <= i__2; ++j) { - if (a[j + k * a_dim1] != 0.f) { - temp = a[j + k * a_dim1]; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - b[i__ + j * b_dim1] -= temp * b[i__ + k * - b_dim1]; -/* L330: */ - } - } -/* L340: */ - } - if (*alpha != 1.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] - ; -/* L350: */ - } - } -/* L360: */ - } - } - } - } - - return 0; - -/* End of STRSM . */ - -} /* strsm_ */ - -/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, - integer *incx, doublecomplex *zy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - doublecomplex z__1, z__2; - - /* Local variables */ - static integer i__, ix, iy; - extern doublereal dcabs1_(doublecomplex *); - - -/* - constant times a vector plus a vector. - jack dongarra, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - /* Parameter adjustments */ - --zy; - --zx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (dcabs1_(za) == 0.) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - i__4 = ix; - z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ - i__4].i + za->i * zx[i__4].r; - z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; - zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - i__4 = i__; - z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ - i__4].i + za->i * zx[i__4].r; - z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; - zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; -/* L30: */ - } - return 0; -} /* zaxpy_ */ - -/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, - doublecomplex *zy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Local variables */ - static integer i__, ix, iy; - - -/* - copies a vector, x, to a vector, y. - jack dongarra, linpack, 4/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zy; - --zx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = ix; - zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; -/* L30: */ - } - return 0; -} /* zcopy_ */ - -/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, - doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, ix, iy; - static doublecomplex ztemp; - - -/* - forms the dot product of a vector. - jack dongarra, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - /* Parameter adjustments */ - --zy; - --zx; - - /* Function Body */ - ztemp.r = 0., ztemp.i = 0.; - ret_val->r = 0., ret_val->i = 0.; - if (*n <= 0) { - return ; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d_cnjg(&z__3, &zx[ix]); - i__2 = iy; - z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * - zy[i__2].i + z__3.i * zy[i__2].r; - z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; - ztemp.r = z__1.r, ztemp.i = z__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val->r = ztemp.r, ret_val->i = ztemp.i; - return ; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d_cnjg(&z__3, &zx[i__]); - i__2 = i__; - z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * - zy[i__2].i + z__3.i * zy[i__2].r; - z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; - ztemp.r = z__1.r, ztemp.i = z__1.i; -/* L30: */ - } - ret_val->r = ztemp.r, ret_val->i = ztemp.i; - return ; -} /* zdotc_ */ - -/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n, - doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublecomplex z__1, z__2; - - /* Local variables */ - static integer i__, ix, iy; - static doublecomplex ztemp; - - -/* - forms the dot product of two vectors. - jack dongarra, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - /* Parameter adjustments */ - --zy; - --zx; - - /* Function Body */ - ztemp.r = 0., ztemp.i = 0.; - ret_val->r = 0., ret_val->i = 0.; - if (*n <= 0) { - return ; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments - not equal to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - i__3 = iy; - z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = - zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; - z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; - ztemp.r = z__1.r, ztemp.i = z__1.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - ret_val->r = ztemp.r, ret_val->i = ztemp.i; - return ; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = - zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; - z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; - ztemp.r = z__1.r, ztemp.i = z__1.i; -/* L30: */ - } - ret_val->r = ztemp.r, ret_val->i = ztemp.i; - return ; -} /* zdotu_ */ - -/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, - integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublecomplex z__1, z__2; - - /* Local variables */ - static integer i__, ix; - - -/* - scales a vector by a constant. - jack dongarra, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - z__2.r = *da, z__2.i = 0.; - i__3 = ix; - z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * - zx[i__3].i + z__2.i * zx[i__3].r; - zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; - ix += *incx; -/* L10: */ - } - return 0; - -/* code for increment equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - z__2.r = *da, z__2.i = 0.; - i__3 = i__; - z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * - zx[i__3].i + z__2.i * zx[i__3].r; - zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; -/* L30: */ - } - return 0; -} /* zdscal_ */ - -/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer * - n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, - doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * - c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5, i__6; - doublecomplex z__1, z__2, z__3, z__4; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, l, info; - static logical nota, notb; - static doublecomplex temp; - static logical conja, conjb; - static integer ncola; - extern logical lsame_(char *, char *); - static integer nrowa, nrowb; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZGEMM performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - Parameters - ========== - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n', op( A ) = A. - - TRANSA = 'T' or 't', op( A ) = A'. - - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - - Unchanged on exit. - - TRANSB - CHARACTER*1. - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - - TRANSB = 'N' or 'n', op( B ) = B. - - TRANSB = 'T' or 't', op( B ) = B'. - - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is - k when TRANSA = 'N' or 'n', and is m otherwise. - Before entry with TRANSA = 'N' or 'n', the leading m by k - part of the array A must contain the matrix A, otherwise - the leading k by m part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANSA = 'N' or 'n' then - LDA must be at least max( 1, m ), otherwise LDA must be at - least max( 1, k ). - Unchanged on exit. - - B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANSB = 'N' or 'n' then - LDB must be at least max( 1, k ), otherwise LDB must be at - least max( 1, n ). - Unchanged on exit. - - BETA - COMPLEX*16 . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - Unchanged on exit. - - C - COMPLEX*16 array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*op( B ) + beta*C ). - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Set NOTA and NOTB as true if A and B respectively are not - conjugated or transposed, set CONJA and CONJB as true if A and - B respectively are to be transposed but not conjugated and set - NROWA, NCOLA and NROWB as the number of rows and columns of A - and the number of rows of B respectively. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - nota = lsame_(transa, "N"); - notb = lsame_(transb, "N"); - conja = lsame_(transa, "C"); - conjb = lsame_(transb, "C"); - if (nota) { - nrowa = *m; - ncola = *k; - } else { - nrowa = *k; - ncola = *m; - } - if (notb) { - nrowb = *k; - } else { - nrowb = *n; - } - -/* Test the input parameters. */ - - info = 0; - if (! nota && ! conja && ! lsame_(transa, "T")) { - info = 1; - } else if (! notb && ! conjb && ! lsame_(transb, "T")) { - info = 2; - } else if (*m < 0) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*k < 0) { - info = 5; - } else if (*lda < max(1,nrowa)) { - info = 8; - } else if (*ldb < max(1,nrowb)) { - info = 10; - } else if (*ldc < max(1,*m)) { - info = 13; - } - if (info != 0) { - xerbla_("ZGEMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && - (beta->r == 1. && beta->i == 0.)) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0. && alpha->i == 0.) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, - z__1.i = beta->r * c__[i__4].i + beta->i * c__[ - i__4].r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L30: */ - } -/* L40: */ - } - } - return 0; - } - -/* Start the operations. */ - - if (notb) { - if (nota) { - -/* Form C := alpha*A*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0. && beta->i == 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L50: */ - } - } else if (beta->r != 1. || beta->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L60: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = l + j * b_dim1; - if (b[i__3].r != 0. || b[i__3].i != 0.) { - i__3 = l + j * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - z__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - z__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] - .i + z__2.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L70: */ - } - } -/* L80: */ - } -/* L90: */ - } - } else if (conja) { - -/* Form C := alpha*conjg( A' )*B + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, - z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L110: */ - } -/* L120: */ - } - } else { - -/* Form C := alpha*A'*B + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - i__5 = l + j * b_dim1; - z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] - .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] - .i * b[i__5].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L130: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L140: */ - } -/* L150: */ - } - } - } else if (nota) { - if (conjb) { - -/* Form C := alpha*A*conjg( B' ) + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0. && beta->i == 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L160: */ - } - } else if (beta->r != 1. || beta->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L170: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * b_dim1; - if (b[i__3].r != 0. || b[i__3].i != 0.) { - d_cnjg(&z__2, &b[j + l * b_dim1]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, - z__1.i = alpha->r * z__2.i + alpha->i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - z__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] - .i + z__2.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L180: */ - } - } -/* L190: */ - } -/* L200: */ - } - } else { - -/* Form C := alpha*A*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (beta->r == 0. && beta->i == 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L210: */ - } - } else if (beta->r != 1. || beta->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__1.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L220: */ - } - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * b_dim1; - if (b[i__3].r != 0. || b[i__3].i != 0.) { - i__3 = j + l * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - z__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - z__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] - .i + z__2.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L230: */ - } - } -/* L240: */ - } -/* L250: */ - } - } - } else if (conja) { - if (conjb) { - -/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - d_cnjg(&z__4, &b[j + l * b_dim1]); - z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = - z__3.r * z__4.i + z__3.i * z__4.r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L260: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L270: */ - } -/* L280: */ - } - } else { - -/* Form C := alpha*conjg( A' )*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = j + l * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, - z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L290: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L300: */ - } -/* L310: */ - } - } - } else { - if (conjb) { - -/* Form C := alpha*A'*conjg( B' ) + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - d_cnjg(&z__3, &b[j + l * b_dim1]); - z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, - z__2.i = a[i__4].r * z__3.i + a[i__4].i * - z__3.r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L320: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L330: */ - } -/* L340: */ - } - } else { - -/* Form C := alpha*A'*B' + beta*C */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - i__4 = l + i__ * a_dim1; - i__5 = j + l * b_dim1; - z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] - .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] - .i * b[i__5].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L350: */ - } - if (beta->r == 0. && beta->i == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, - z__2.i = alpha->r * temp.i + alpha->i * - temp.r; - i__4 = i__ + j * c_dim1; - z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] - .i, z__3.i = beta->r * c__[i__4].i + beta->i * - c__[i__4].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L360: */ - } -/* L370: */ - } - } - } - - return 0; - -/* End of ZGEMM . */ - -} /* zgemm_ */ - -/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, - doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * - x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * - incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublecomplex temp; - static integer lenx, leny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj; - - -/* - Purpose - ======= - - ZGEMV performs one of the matrix-vector operations - - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or - - y := alpha*conjg( A' )*x + beta*y, - - where alpha and beta are scalars, x and y are vectors and A is an - m by n matrix. - - Parameters - ========== - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - - TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - X - COMPLEX*16 array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - COMPLEX*16 . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - COMPLEX*16 array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") - ) { - info = 1; - } else if (*m < 0) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*lda < max(1,*m)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } else if (*incy == 0) { - info = 11; - } - if (info != 0) { - xerbla_("ZGEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == - 1. && beta->i == 0.)) { - return 0; - } - - noconj = lsame_(trans, "T"); - -/* - Set LENX and LENY, the lengths of the vectors x and y, and set - up the start points in X and Y. -*/ - - if (lsame_(trans, "N")) { - lenx = *n; - leny = *m; - } else { - lenx = *m; - leny = *n; - } - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (lenx - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (leny - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. - - First form y := beta*y. -*/ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; -/* L10: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = leny; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return 0; - } - if (lsame_(trans, "N")) { - -/* Form y := alpha*A*x + y. */ - - jx = kx; - if (*incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - temp.r = z__1.r, temp.i = z__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + - z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; -/* L50: */ - } - } - jx += *incx; -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - temp.r = z__1.r, temp.i = z__1.i; - iy = ky; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + - z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - iy += *incy; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } else { - -/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ - - jy = ky; - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = 0., temp.i = 0.; - if (noconj) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] - .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] - .i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L90: */ - } - } else { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - } - i__2 = jy; - i__3 = jy; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = - alpha->r * temp.i + alpha->i * temp.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jy += *incy; -/* L110: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = 0., temp.i = 0.; - ix = kx; - if (noconj) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = ix; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] - .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] - .i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L120: */ - } - } else { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L130: */ - } - } - i__2 = jy; - i__3 = jy; - z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = - alpha->r * temp.i + alpha->i * temp.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jy += *incy; -/* L140: */ - } - } - } - - return 0; - -/* End of ZGEMV . */ - -} /* zgemv_ */ - -/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, - doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, - doublecomplex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static doublecomplex temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZGERC performs the rank 1 operation - - A := alpha*x*conjg( y' ) + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("ZGERC ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0. || y[i__2].i != 0.) { - d_cnjg(&z__2, &y[jy]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0. || y[i__2].i != 0.) { - d_cnjg(&z__2, &y[jy]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of ZGERC . */ - -} /* zgerc_ */ - -/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, - doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, - doublecomplex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2; - - /* Local variables */ - static integer i__, j, ix, jy, kx, info; - static doublecomplex temp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZGERU performs the rank 1 operation - - A := alpha*x*y' + A, - - where alpha is a scalar, x is an m element vector, y is an n element - vector and A is an m by n matrix. - - Parameters - ========== - - M - INTEGER. - On entry, M specifies the number of rows of the matrix A. - M must be at least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( m - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the m - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry, the leading m by n part of the array A must - contain the matrix of coefficients. On exit, A is - overwritten by the updated matrix. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, m ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (*m < 0) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("ZGERU ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { - return 0; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (*incy > 0) { - jy = 1; - } else { - jy = 1 - (*n - 1) * *incy; - } - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0. || y[i__2].i != 0.) { - i__2 = jy; - z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = - alpha->r * y[i__2].i + alpha->i * y[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L10: */ - } - } - jy += *incy; -/* L20: */ - } - } else { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*m - 1) * *incx; - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jy; - if (y[i__2].r != 0. || y[i__2].i != 0.) { - i__2 = jy; - z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = - alpha->r * y[i__2].i + alpha->i * y[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - ix = kx; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = - x[i__5].r * temp.i + x[i__5].i * temp.r; - z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - ix += *incx; -/* L30: */ - } - } - jy += *incy; -/* L40: */ - } - } - - return 0; - -/* End of ZGERU . */ - -} /* zgeru_ */ - -/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, - doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, - doublecomplex *beta, doublecomplex *y, integer *incy) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublecomplex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZHEMV performs the matrix-vector operation - - y := alpha*A*x + beta*y, - - where alpha and beta are scalars, x and y are n element vectors and - A is an n by n hermitian matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of A is not referenced. - Note that the imaginary parts of the diagonal elements need - not be set and are assumed to be zero. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - BETA - COMPLEX*16 . - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - Unchanged on exit. - - Y - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. On exit, Y is overwritten by the updated - vector y. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --y; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*lda < max(1,*n)) { - info = 5; - } else if (*incx == 0) { - info = 7; - } else if (*incy == 0) { - info = 10; - } - if (info != 0) { - xerbla_("ZHEMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && - beta->i == 0.)) { - return 0; - } - -/* Set up the start points in X and Y. */ - - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. - - First form y := beta*y. -*/ - - if (beta->r != 1. || beta->i != 0.) { - if (*incy == 1) { - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - y[i__2].r = 0., y[i__2].i = 0.; -/* L10: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L20: */ - } - } - } else { - iy = ky; - if (beta->r == 0. && beta->i == 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - y[i__2].r = 0., y[i__2].i = 0.; - iy += *incy; -/* L30: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = iy; - i__3 = iy; - z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, - z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] - .r; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - iy += *incy; -/* L40: */ - } - } - } - } - if (alpha->r == 0. && alpha->i == 0.) { - return 0; - } - if (lsame_(uplo, "U")) { - -/* Form y when A is stored in upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L50: */ - } - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - d__1 = a[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L60: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; - ix += *incx; - iy += *incy; -/* L70: */ - } - i__2 = jy; - i__3 = jy; - i__4 = j + j * a_dim1; - d__1 = a[i__4].r; - z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; - z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; - z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } else { - -/* Form y when A is stored in lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - d__1 = a[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L90: */ - } - i__2 = j; - i__3 = j; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; -/* L100: */ - } - } else { - jx = kx; - jy = ky; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = - alpha->r * x[i__2].i + alpha->i * x[i__2].r; - temp1.r = z__1.r, temp1.i = z__1.i; - temp2.r = 0., temp2.i = 0.; - i__2 = jy; - i__3 = jy; - i__4 = j + j * a_dim1; - d__1 = a[i__4].r; - z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - i__3 = iy; - i__4 = iy; - i__5 = i__ + j * a_dim1; - z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, - z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] - .r; - z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; - y[i__3].r = z__1.r, y[i__3].i = z__1.i; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = - z__3.r * x[i__3].i + z__3.i * x[i__3].r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L110: */ - } - i__2 = jy; - i__3 = jy; - z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = - alpha->r * temp2.i + alpha->i * temp2.r; - z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; - y[i__2].r = z__1.r, y[i__2].i = z__1.i; - jx += *incx; - jy += *incy; -/* L120: */ - } - } - } - - return 0; - -/* End of ZHEMV . */ - -} /* zhemv_ */ - -/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, - doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, - doublecomplex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, iy, jx, jy, kx, ky, info; - static doublecomplex temp1, temp2; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZHER2 performs the hermitian rank 2 operation - - A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, - - where alpha is a scalar, x and y are n element vectors and A is an n - by n hermitian matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array A is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of A - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of A - is to be referenced. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. - Unchanged on exit. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - Y - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCY ) ). - Before entry, the incremented array Y must contain the n - element vector y. - Unchanged on exit. - - INCY - INTEGER. - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of A is not referenced. On exit, the - upper triangular part of the array A is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of A is not referenced. On exit, the - lower triangular part of the array A is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --x; - --y; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (*n < 0) { - info = 2; - } else if (*incx == 0) { - info = 5; - } else if (*incy == 0) { - info = 7; - } else if (*lda < max(1,*n)) { - info = 9; - } - if (info != 0) { - xerbla_("ZHER2 ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { - return 0; - } - -/* - Set up the start points in X and Y if the increments are not both - unity. -*/ - - if (*incx != 1 || *incy != 1) { - if (*incx > 0) { - kx = 1; - } else { - kx = 1 - (*n - 1) * *incx; - } - if (*incy > 0) { - ky = 1; - } else { - ky = 1 - (*n - 1) * *incy; - } - jx = kx; - jy = ky; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through the triangular part - of A. -*/ - - if (lsame_(uplo, "U")) { - -/* Form A when A is stored in the upper triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - i__3 = j; - if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || - y[i__3].i != 0.)) { - d_cnjg(&z__2, &y[j]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__2 = j; - z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - z__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + - z__3.i; - i__6 = i__; - z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - z__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L10: */ - } - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = j; - z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - z__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = j; - z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - z__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = a[i__3].r + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - i__3 = jy; - if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || - y[i__3].i != 0.)) { - d_cnjg(&z__2, &y[jy]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__2 = jx; - z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - ix = kx; - iy = ky; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - z__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + - z__3.i; - i__6 = iy; - z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - z__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - ix += *incx; - iy += *incy; -/* L30: */ - } - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = jx; - z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - z__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = jy; - z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - z__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = a[i__3].r + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } - jx += *incx; - jy += *incy; -/* L40: */ - } - } - } else { - -/* Form A when A is stored in the lower triangle. */ - - if (*incx == 1 && *incy == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - i__3 = j; - if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || - y[i__3].i != 0.)) { - d_cnjg(&z__2, &y[j]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__2 = j; - z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = j; - z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - z__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = j; - z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - z__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = a[i__3].r + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = i__; - z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - z__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + - z__3.i; - i__6 = i__; - z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - z__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L50: */ - } - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - i__3 = jy; - if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || - y[i__3].i != 0.)) { - d_cnjg(&z__2, &y[jy]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = - alpha->r * z__2.i + alpha->i * z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__2 = jx; - z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, - z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] - .r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - i__4 = jx; - z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, - z__2.i = x[i__4].r * temp1.i + x[i__4].i * - temp1.r; - i__5 = jy; - z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, - z__3.i = y[i__5].r * temp2.i + y[i__5].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = a[i__3].r + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - ix = jx; - iy = jy; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - iy += *incy; - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - i__5 = ix; - z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, - z__3.i = x[i__5].r * temp1.i + x[i__5].i * - temp1.r; - z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + - z__3.i; - i__6 = iy; - z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, - z__4.i = y[i__6].r * temp2.i + y[i__6].i * - temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L70: */ - } - } else { - i__2 = j + j * a_dim1; - i__3 = j + j * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } - jx += *incx; - jy += *incy; -/* L80: */ - } - } - } - - return 0; - -/* End of ZHER2 . */ - -} /* zher2_ */ - -/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, - doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * - b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5, i__6, i__7; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, l, info; - static doublecomplex temp1, temp2; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZHER2K performs one of the hermitian rank 2k operations - - C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, - - or - - C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, - - where alpha and beta are scalars with beta real, C is an n by n - hermitian matrix and A and B are n by k matrices in the first case - and k by n matrices in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + - conjg( alpha )*B*conjg( A' ) + - beta*C. - - TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + - conjg( alpha )*conjg( B' )*A + - beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrices A and B, and on entry with - TRANS = 'C' or 'c', K specifies the number of rows of the - matrices A and B. K must be at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array B must contain the matrix B, otherwise - the leading k by n part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDB must be at least max( 1, n ), otherwise LDB must - be at least max( 1, k ). - Unchanged on exit. - - BETA - DOUBLE PRECISION . - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - COMPLEX*16 array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. - Ed Anderson, Cray Research Inc. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldb < max(1,nrowa)) { - info = 9; - } else if (*ldc < max(1,*n)) { - info = 12; - } - if (info != 0) { - xerbla_("ZHER2K", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == - 1.) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0. && alpha->i == 0.) { - if (upper) { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L30: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; -/* L40: */ - } - } - } else { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* - Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + - C. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L90: */ - } - } else if (*beta != 1.) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L100: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - i__4 = j + l * b_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != - 0. || b[i__4].i != 0.)) { - d_cnjg(&z__2, &b[j + l * b_dim1]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, - z__1.i = alpha->r * z__2.i + alpha->i * - z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__3 = j + l * a_dim1; - z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, - z__2.i = alpha->r * a[i__3].i + alpha->i * a[ - i__3].r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__3.r = a[i__6].r * temp1.r - a[i__6].i * - temp1.i, z__3.i = a[i__6].r * temp1.i + a[ - i__6].i * temp1.r; - z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] - .i + z__3.i; - i__7 = i__ + l * b_dim1; - z__4.r = b[i__7].r * temp2.r - b[i__7].i * - temp2.i, z__4.i = b[i__7].r * temp2.i + b[ - i__7].i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + - z__4.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L110: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, - z__2.i = a[i__5].r * temp1.i + a[i__5].i * - temp1.r; - i__6 = j + l * b_dim1; - z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, - z__3.i = b[i__6].r * temp2.i + b[i__6].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L140: */ - } - } else if (*beta != 1.) { - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L150: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - i__4 = j + l * b_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != - 0. || b[i__4].i != 0.)) { - d_cnjg(&z__2, &b[j + l * b_dim1]); - z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, - z__1.i = alpha->r * z__2.i + alpha->i * - z__2.r; - temp1.r = z__1.r, temp1.i = z__1.i; - i__3 = j + l * a_dim1; - z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, - z__2.i = alpha->r * a[i__3].i + alpha->i * a[ - i__3].r; - d_cnjg(&z__1, &z__2); - temp2.r = z__1.r, temp2.i = z__1.i; - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__3.r = a[i__6].r * temp1.r - a[i__6].i * - temp1.i, z__3.i = a[i__6].r * temp1.i + a[ - i__6].i * temp1.r; - z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] - .i + z__3.i; - i__7 = i__ + l * b_dim1; - z__4.r = b[i__7].r * temp2.r - b[i__7].i * - temp2.i, z__4.i = b[i__7].r * temp2.i + b[ - i__7].i * temp2.r; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + - z__4.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L160: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, - z__2.i = a[i__5].r * temp1.i + a[i__5].i * - temp1.r; - i__6 = j + l * b_dim1; - z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, - z__3.i = b[i__6].r * temp2.i + b[i__6].i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - d__1 = c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* - Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + - C. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - temp1.r = 0., temp1.i = 0.; - temp2.r = 0., temp2.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, - z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] - .r; - z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; - temp1.r = z__1.r, temp1.i = z__1.i; - d_cnjg(&z__3, &b[l + i__ * b_dim1]); - i__4 = l + j * a_dim1; - z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, - z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] - .r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L190: */ - } - if (i__ == j) { - if (*beta == 0.) { - i__3 = j + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - d__1 = z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } else { - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - d__1 = *beta * c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } - } else { - if (*beta == 0.) { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__3.r = *beta * c__[i__4].r, z__3.i = *beta * - c__[i__4].i; - z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__4.i = alpha->r * temp1.i + alpha->i * - temp1.r; - z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + - z__4.i; - d_cnjg(&z__6, alpha); - z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, - z__5.i = z__6.r * temp2.i + z__6.i * - temp2.r; - z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + - z__5.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } - } -/* L200: */ - } -/* L210: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - temp1.r = 0., temp1.i = 0.; - temp2.r = 0., temp2.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = l + j * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, - z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] - .r; - z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; - temp1.r = z__1.r, temp1.i = z__1.i; - d_cnjg(&z__3, &b[l + i__ * b_dim1]); - i__4 = l + j * a_dim1; - z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, - z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] - .r; - z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; - temp2.r = z__1.r, temp2.i = z__1.i; -/* L220: */ - } - if (i__ == j) { - if (*beta == 0.) { - i__3 = j + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - d__1 = z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } else { - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - d__1 = *beta * c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } - } else { - if (*beta == 0.) { - i__3 = i__ + j * c_dim1; - z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__2.i = alpha->r * temp1.i + alpha->i * - temp1.r; - d_cnjg(&z__4, alpha); - z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, - z__3.i = z__4.r * temp2.i + z__4.i * - temp2.r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__3.r = *beta * c__[i__4].r, z__3.i = *beta * - c__[i__4].i; - z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, - z__4.i = alpha->r * temp1.i + alpha->i * - temp1.r; - z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + - z__4.i; - d_cnjg(&z__6, alpha); - z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, - z__5.i = z__6.r * temp2.i + z__6.i * - temp2.r; - z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + - z__5.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } - } -/* L230: */ - } -/* L240: */ - } - } - } - - return 0; - -/* End of ZHER2K. */ - -} /* zher2k_ */ - -/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k, - doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, - doublecomplex *c__, integer *ldc) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, - i__6; - doublereal d__1; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, l, info; - static doublecomplex temp; - extern logical lsame_(char *, char *); - static integer nrowa; - static doublereal rtemp; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - Purpose - ======= - - ZHERK performs one of the hermitian rank k operations - - C := alpha*A*conjg( A' ) + beta*C, - - or - - C := alpha*conjg( A' )*A + beta*C, - - where alpha and beta are real scalars, C is an n by n hermitian - matrix and A is an n by k matrix in the first case and a k by n - matrix in the second case. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the upper or lower - triangular part of the array C is to be referenced as - follows: - - UPLO = 'U' or 'u' Only the upper triangular part of C - is to be referenced. - - UPLO = 'L' or 'l' Only the lower triangular part of C - is to be referenced. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. - - TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - INTEGER. - On entry with TRANS = 'N' or 'n', K specifies the number - of columns of the matrix A, and on entry with - TRANS = 'C' or 'c', K specifies the number of rows of the - matrix A. K must be at least zero. - Unchanged on exit. - - ALPHA - DOUBLE PRECISION . - On entry, ALPHA specifies the scalar alpha. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is - k when TRANS = 'N' or 'n', and is n otherwise. - Before entry with TRANS = 'N' or 'n', the leading n by k - part of the array A must contain the matrix A, otherwise - the leading k by n part of the array A must contain the - matrix A. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When TRANS = 'N' or 'n' - then LDA must be at least max( 1, n ), otherwise LDA must - be at least max( 1, k ). - Unchanged on exit. - - BETA - DOUBLE PRECISION. - On entry, BETA specifies the scalar beta. - Unchanged on exit. - - C - COMPLEX*16 array of DIMENSION ( LDC, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array C must contain the upper - triangular part of the hermitian matrix and the strictly - lower triangular part of C is not referenced. On exit, the - upper triangular part of the array C is overwritten by the - upper triangular part of the updated matrix. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array C must contain the lower - triangular part of the hermitian matrix and the strictly - upper triangular part of C is not referenced. On exit, the - lower triangular part of the array C is overwritten by the - lower triangular part of the updated matrix. - Note that the imaginary parts of the diagonal elements need - not be set, they are assumed to be zero, and on exit they - are set to zero. - - LDC - INTEGER. - On entry, LDC specifies the first dimension of C as declared - in the calling (sub) program. LDC must be at least - max( 1, n ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. - Ed Anderson, Cray Research Inc. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - - /* Function Body */ - if (lsame_(trans, "N")) { - nrowa = *n; - } else { - nrowa = *k; - } - upper = lsame_(uplo, "U"); - - info = 0; - if (! upper && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - info = 2; - } else if (*n < 0) { - info = 3; - } else if (*k < 0) { - info = 4; - } else if (*lda < max(1,nrowa)) { - info = 7; - } else if (*ldc < max(1,*n)) { - info = 10; - } - if (info != 0) { - xerbla_("ZHERK ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (*alpha == 0.) { - if (upper) { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L30: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; -/* L40: */ - } - } - } else { - if (*beta == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L70: */ - } -/* L80: */ - } - } - } - return 0; - } - -/* Start the operations. */ - - if (lsame_(trans, "N")) { - -/* Form C := alpha*A*conjg( A' ) + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L90: */ - } - } else if (*beta != 1.) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L100: */ - } - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - d_cnjg(&z__2, &a[j + l * a_dim1]); - z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - z__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] - .i + z__2.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L110: */ - } - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = i__ + l * a_dim1; - z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - d__1 = c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - } -/* L120: */ - } -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*beta == 0.) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - c__[i__3].r = 0., c__[i__3].i = 0.; -/* L140: */ - } - } else if (*beta != 1.) { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ - i__4].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L150: */ - } - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - i__3 = j + l * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - d_cnjg(&z__2, &a[j + l * a_dim1]); - z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = j + j * c_dim1; - i__4 = j + j * c_dim1; - i__5 = j + l * a_dim1; - z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] - .r; - d__1 = c__[i__4].r + z__1.r; - c__[i__3].r = d__1, c__[i__3].i = 0.; - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * c_dim1; - i__6 = i__ + l * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, - z__2.i = temp.r * a[i__6].i + temp.i * a[ - i__6].r; - z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] - .i + z__2.i; - c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; -/* L160: */ - } - } -/* L170: */ - } -/* L180: */ - } - } - } else { - -/* Form C := alpha*conjg( A' )*A + beta*C. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = l + j * a_dim1; - z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, - z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L190: */ - } - if (*beta == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; - i__4 = i__ + j * c_dim1; - z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L200: */ - } - rtemp = 0.; - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - d_cnjg(&z__3, &a[l + j * a_dim1]); - i__3 = l + j * a_dim1; - z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = - z__3.r * a[i__3].i + z__3.i * a[i__3].r; - z__1.r = rtemp + z__2.r, z__1.i = z__2.i; - rtemp = z__1.r; -/* L210: */ - } - if (*beta == 0.) { - i__2 = j + j * c_dim1; - d__1 = *alpha * rtemp; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *alpha * rtemp + *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } -/* L220: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - rtemp = 0.; - i__2 = *k; - for (l = 1; l <= i__2; ++l) { - d_cnjg(&z__3, &a[l + j * a_dim1]); - i__3 = l + j * a_dim1; - z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = - z__3.r * a[i__3].i + z__3.i * a[i__3].r; - z__1.r = rtemp + z__2.r, z__1.i = z__2.i; - rtemp = z__1.r; -/* L230: */ - } - if (*beta == 0.) { - i__2 = j + j * c_dim1; - d__1 = *alpha * rtemp; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } else { - i__2 = j + j * c_dim1; - i__3 = j + j * c_dim1; - d__1 = *alpha * rtemp + *beta * c__[i__3].r; - c__[i__2].r = d__1, c__[i__2].i = 0.; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - temp.r = 0., temp.i = 0.; - i__3 = *k; - for (l = 1; l <= i__3; ++l) { - d_cnjg(&z__3, &a[l + i__ * a_dim1]); - i__4 = l + j * a_dim1; - z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, - z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] - .r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L240: */ - } - if (*beta == 0.) { - i__3 = i__ + j * c_dim1; - z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } else { - i__3 = i__ + j * c_dim1; - z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; - i__4 = i__ + j * c_dim1; - z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; - } -/* L250: */ - } -/* L260: */ - } - } - } - - return 0; - -/* End of ZHERK . */ - -} /* zherk_ */ - -/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, - integer *incx) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublecomplex z__1; - - /* Local variables */ - static integer i__, ix; - - -/* - scales a vector by a constant. - jack dongarra, 3/11/78. - modified 3/93 to return if incx .le. 0. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zx; - - /* Function Body */ - if (*n <= 0 || *incx <= 0) { - return 0; - } - if (*incx == 1) { - goto L20; - } - -/* code for increment not equal to 1 */ - - ix = 1; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - i__3 = ix; - z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ - i__3].i + za->i * zx[i__3].r; - zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; - ix += *incx; -/* L10: */ - } - return 0; - -/* code for increment equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__; - z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ - i__3].i + za->i * zx[i__3].r; - zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; -/* L30: */ - } - return 0; -} /* zscal_ */ - -/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, - doublecomplex *zy, integer *incy) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Local variables */ - static integer i__, ix, iy; - static doublecomplex ztemp; - - -/* - interchanges two vectors. - jack dongarra, 3/11/78. - modified 12/3/93, array(1) declarations changed to array(*) -*/ - - - /* Parameter adjustments */ - --zy; - --zx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; - i__2 = ix; - i__3 = iy; - zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; - i__2 = iy; - zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; - i__2 = i__; - i__3 = i__; - zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; - i__2 = i__; - zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; -/* L30: */ - } - return 0; -} /* zswap_ */ - -/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, - integer *lda, doublecomplex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, - i__6; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, k, info; - static doublecomplex temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - ZTRMM performs one of the matrix-matrix operations - - B := alpha*op( A )*B, or B := alpha*B*op( A ) - - where alpha is a scalar, B is an m by n matrix, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) multiplies B from - the left or right as follows: - - SIDE = 'L' or 'l' B := alpha*op( A )*B. - - SIDE = 'R' or 'r' B := alpha*B*op( A ). - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = conjg( A' ). - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - COMPLEX*16 array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the matrix B, and on exit is overwritten by the - transformed matrix. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - noconj = lsame_(transa, "T"); - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("ZTRMM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0. && alpha->i == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - b[i__3].r = 0., b[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*A*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * b_dim1; - if (b[i__3].r != 0. || b[i__3].i != 0.) { - i__3 = k + j * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, z__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = k - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * a_dim1; - z__2.r = temp.r * a[i__6].r - temp.i * a[i__6] - .i, z__2.i = temp.r * a[i__6].i + - temp.i * a[i__6].r; - z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] - .i + z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L30: */ - } - if (nounit) { - i__3 = k + k * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] - .i, z__1.i = temp.r * a[i__3].i + - temp.i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__3 = k + j * b_dim1; - b[i__3].r = temp.r, b[i__3].i = temp.i; - } -/* L40: */ - } -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (k = *m; k >= 1; --k) { - i__2 = k + j * b_dim1; - if (b[i__2].r != 0. || b[i__2].i != 0.) { - i__2 = k + j * b_dim1; - z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] - .i, z__1.i = alpha->r * b[i__2].i + - alpha->i * b[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - i__2 = k + j * b_dim1; - b[i__2].r = temp.r, b[i__2].i = temp.i; - if (nounit) { - i__2 = k + j * b_dim1; - i__3 = k + j * b_dim1; - i__4 = k + k * a_dim1; - z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * - a[i__4].i, z__1.i = b[i__3].r * a[ - i__4].i + b[i__3].i * a[i__4].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; - } - i__2 = *m; - for (i__ = k + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5] - .i, z__2.i = temp.r * a[i__5].i + - temp.i * a[i__5].r; - z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] - .i + z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L60: */ - } - } -/* L70: */ - } -/* L80: */ - } - } - } else { - -/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - i__2 = i__ + j * b_dim1; - temp.r = b[i__2].r, temp.i = b[i__2].i; - if (noconj) { - if (nounit) { - i__2 = i__ + i__ * a_dim1; - z__1.r = temp.r * a[i__2].r - temp.i * a[i__2] - .i, z__1.i = temp.r * a[i__2].i + - temp.i * a[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * a_dim1; - i__4 = k + j * b_dim1; - z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * - b[i__4].i, z__2.i = a[i__3].r * b[ - i__4].i + a[i__3].i * b[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L90: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = i__ - 1; - for (k = 1; k <= i__2; ++k) { - d_cnjg(&z__3, &a[k + i__ * a_dim1]); - i__3 = k + j * b_dim1; - z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] - .i, z__2.i = z__3.r * b[i__3].i + - z__3.i * b[i__3].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - } - i__2 = i__ + j * b_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L110: */ - } -/* L120: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - temp.r = b[i__3].r, temp.i = b[i__3].i; - if (noconj) { - if (nounit) { - i__3 = i__ + i__ * a_dim1; - z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] - .i, z__1.i = temp.r * a[i__3].i + - temp.i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - i__4 = k + i__ * a_dim1; - i__5 = k + j * b_dim1; - z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * - b[i__5].i, z__2.i = a[i__4].r * b[ - i__5].i + a[i__4].i * b[i__5].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L130: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__3 = *m; - for (k = i__ + 1; k <= i__3; ++k) { - d_cnjg(&z__3, &a[k + i__ * a_dim1]); - i__4 = k + j * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] - .i, z__2.i = z__3.r * b[i__4].i + - z__3.i * b[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L140: */ - } - } - i__3 = i__ + j * b_dim1; - z__1.r = alpha->r * temp.r - alpha->i * temp.i, - z__1.i = alpha->r * temp.i + alpha->i * - temp.r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L150: */ - } -/* L160: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*A. */ - - if (upper) { - for (j = *n; j >= 1; --j) { - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - i__1 = j + j * a_dim1; - z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - z__1.i = temp.r * a[i__1].i + temp.i * a[i__1] - .r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - z__1.i = temp.r * b[i__3].i + temp.i * b[i__3] - .r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L170: */ - } - i__1 = j - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + j * a_dim1; - if (a[i__2].r != 0. || a[i__2].i != 0.) { - i__2 = k + j * a_dim1; - z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] - .i, z__1.i = alpha->r * a[i__2].i + - alpha->i * a[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, z__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] - .i + z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L180: */ - } - } -/* L190: */ - } -/* L200: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - i__2 = j + j * a_dim1; - z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__1.i = temp.r * a[i__2].i + temp.i * a[i__2] - .r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - z__1.i = temp.r * b[i__4].i + temp.i * b[i__4] - .r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L210: */ - } - i__2 = *n; - for (k = j + 1; k <= i__2; ++k) { - i__3 = k + j * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - i__3 = k + j * a_dim1; - z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] - .i, z__1.i = alpha->r * a[i__3].i + - alpha->i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, z__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] - .i + z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L220: */ - } - } -/* L230: */ - } -/* L240: */ - } - } - } else { - -/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ - - if (upper) { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - i__2 = k - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = j + k * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - if (noconj) { - i__3 = j + k * a_dim1; - z__1.r = alpha->r * a[i__3].r - alpha->i * a[ - i__3].i, z__1.i = alpha->r * a[i__3] - .i + alpha->i * a[i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[j + k * a_dim1]); - z__1.r = alpha->r * z__2.r - alpha->i * - z__2.i, z__1.i = alpha->r * z__2.i + - alpha->i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, z__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] - .i + z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L250: */ - } - } -/* L260: */ - } - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - if (noconj) { - i__2 = k + k * a_dim1; - z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[k + k * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - } - if (temp.r != 1. || temp.i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - z__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L270: */ - } - } -/* L280: */ - } - } else { - for (k = *n; k >= 1; --k) { - i__1 = *n; - for (j = k + 1; j <= i__1; ++j) { - i__2 = j + k * a_dim1; - if (a[i__2].r != 0. || a[i__2].i != 0.) { - if (noconj) { - i__2 = j + k * a_dim1; - z__1.r = alpha->r * a[i__2].r - alpha->i * a[ - i__2].i, z__1.i = alpha->r * a[i__2] - .i + alpha->i * a[i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[j + k * a_dim1]); - z__1.r = alpha->r * z__2.r - alpha->i * - z__2.i, z__1.i = alpha->r * z__2.i + - alpha->i * z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, z__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] - .i + z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L290: */ - } - } -/* L300: */ - } - temp.r = alpha->r, temp.i = alpha->i; - if (nounit) { - if (noconj) { - i__1 = k + k * a_dim1; - z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - z__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[k + k * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - } - if (temp.r != 1. || temp.i != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - z__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L310: */ - } - } -/* L320: */ - } - } - } - } - - return 0; - -/* End of ZTRMM . */ - -} /* ztrmm_ */ - -/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, - doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static doublecomplex temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - ZTRMV performs one of the matrix-vector operations - - x := A*x, or x := A'*x, or x := conjg( A' )*x, - - where x is an n element vector and A is an n by n unit, or non-unit, - upper or lower triangular matrix. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the operation to be performed as - follows: - - TRANS = 'N' or 'n' x := A*x. - - TRANS = 'T' or 't' x := A'*x. - - TRANS = 'C' or 'c' x := conjg( A' )*x. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element vector x. On exit, X is overwritten with the - tranformed vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("ZTRMV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := A*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + - z__2.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; -/* L10: */ - } - if (nounit) { - i__2 = j; - i__3 = j; - i__4 = j + j * a_dim1; - z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ - i__4].i, z__1.i = x[i__3].r * a[i__4].i + - x[i__3].i * a[i__4].r; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - } - } -/* L20: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = kx; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = ix; - i__4 = ix; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + - z__2.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - ix += *incx; -/* L30: */ - } - if (nounit) { - i__2 = jx; - i__3 = jx; - i__4 = j + j * a_dim1; - z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ - i__4].i, z__1.i = x[i__3].r * a[i__4].i + - x[i__3].i * a[i__4].r; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - } - } - jx += *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__; - i__3 = i__; - i__4 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - z__2.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + - z__2.i; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; -/* L50: */ - } - if (nounit) { - i__1 = j; - i__2 = j; - i__3 = j + j * a_dim1; - z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, z__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; - } - } -/* L60: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = kx; - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = ix; - i__3 = ix; - i__4 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, - z__2.i = temp.r * a[i__4].i + temp.i * a[ - i__4].r; - z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + - z__2.i; - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - ix -= *incx; -/* L70: */ - } - if (nounit) { - i__1 = jx; - i__2 = jx; - i__3 = j + j * a_dim1; - z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ - i__3].i, z__1.i = x[i__2].r * a[i__3].i + - x[i__2].i * a[i__3].r; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; - } - } - jx -= *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := A'*x or x := conjg( A' )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - if (nounit) { - i__1 = j + j * a_dim1; - z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - z__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = z__1.r, temp.i = z__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - i__1 = i__ + j * a_dim1; - i__2 = i__; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, z__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L90: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__1 = i__; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, - z__2.i = z__3.r * x[i__1].i + z__3.i * x[ - i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - } - i__1 = j; - x[i__1].r = temp.r, x[i__1].i = temp.i; -/* L110: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = jx; - if (noconj) { - if (nounit) { - i__1 = j + j * a_dim1; - z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, - z__1.i = temp.r * a[i__1].i + temp.i * a[ - i__1].r; - temp.r = z__1.r, temp.i = z__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - i__1 = i__ + j * a_dim1; - i__2 = ix; - z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ - i__2].i, z__2.i = a[i__1].r * x[i__2].i + - a[i__1].i * x[i__2].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L120: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__1 = ix; - z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, - z__2.i = z__3.r * x[i__1].i + z__3.i * x[ - i__1].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L130: */ - } - } - i__1 = jx; - x[i__1].r = temp.r, x[i__1].i = temp.i; - jx -= *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - if (nounit) { - i__2 = j + j * a_dim1; - z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, z__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L150: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[ - i__3].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L160: */ - } - } - i__2 = j; - x[i__2].r = temp.r, x[i__2].i = temp.i; -/* L170: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = jx; - if (noconj) { - if (nounit) { - i__2 = j + j * a_dim1; - z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, - z__1.i = temp.r * a[i__2].i + temp.i * a[ - i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - i__3 = i__ + j * a_dim1; - i__4 = ix; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, z__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L180: */ - } - } else { - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = temp.r * z__2.r - temp.i * z__2.i, - z__1.i = temp.r * z__2.i + temp.i * - z__2.r; - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[ - i__3].r; - z__1.r = temp.r + z__2.r, z__1.i = temp.i + - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L190: */ - } - } - i__2 = jx; - x[i__2].r = temp.r, x[i__2].i = temp.i; - jx += *incx; -/* L200: */ - } - } - } - } - - return 0; - -/* End of ZTRMV . */ - -} /* ztrmv_ */ - -/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, - integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, - integer *lda, doublecomplex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, - i__6, i__7; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( - doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, k, info; - static doublecomplex temp; - static logical lside; - extern logical lsame_(char *, char *); - static integer nrowa; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - ZTRSM solves one of the matrix equations - - op( A )*X = alpha*B, or X*op( A ) = alpha*B, - - where alpha is a scalar, X and B are m by n matrices, A is a unit, or - non-unit, upper or lower triangular matrix and op( A ) is one of - - op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). - - The matrix X is overwritten on B. - - Parameters - ========== - - SIDE - CHARACTER*1. - On entry, SIDE specifies whether op( A ) appears on the left - or right of X as follows: - - SIDE = 'L' or 'l' op( A )*X = alpha*B. - - SIDE = 'R' or 'r' X*op( A ) = alpha*B. - - Unchanged on exit. - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix A is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANSA - CHARACTER*1. - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - - TRANSA = 'N' or 'n' op( A ) = A. - - TRANSA = 'T' or 't' op( A ) = A'. - - TRANSA = 'C' or 'c' op( A ) = conjg( A' ). - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit triangular - as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - M - INTEGER. - On entry, M specifies the number of rows of B. M must be at - least zero. - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the number of columns of B. N must be - at least zero. - Unchanged on exit. - - ALPHA - COMPLEX*16 . - On entry, ALPHA specifies the scalar alpha. When alpha is - zero then A is not referenced and B need not be set before - entry. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m - when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. - Before entry with UPLO = 'U' or 'u', the leading k by k - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading k by k - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. When SIDE = 'L' or 'l' then - LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' - then LDA must be at least max( 1, n ). - Unchanged on exit. - - B - COMPLEX*16 array of DIMENSION ( LDB, n ). - Before entry, the leading m by n part of the array B must - contain the right-hand side matrix B, and on exit is - overwritten by the solution matrix X. - - LDB - INTEGER. - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least - max( 1, m ). - Unchanged on exit. - - - Level 3 Blas routine. - - -- Written on 8-February-1989. - Jack Dongarra, Argonne National Laboratory. - Iain Duff, AERE Harwell. - Jeremy Du Croz, Numerical Algorithms Group Ltd. - Sven Hammarling, Numerical Algorithms Group Ltd. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - lside = lsame_(side, "L"); - if (lside) { - nrowa = *m; - } else { - nrowa = *n; - } - noconj = lsame_(transa, "T"); - nounit = lsame_(diag, "N"); - upper = lsame_(uplo, "U"); - - info = 0; - if (! lside && ! lsame_(side, "R")) { - info = 1; - } else if (! upper && ! lsame_(uplo, "L")) { - info = 2; - } else if (! lsame_(transa, "N") && ! lsame_(transa, - "T") && ! lsame_(transa, "C")) { - info = 3; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 4; - } else if (*m < 0) { - info = 5; - } else if (*n < 0) { - info = 6; - } else if (*lda < max(1,nrowa)) { - info = 9; - } else if (*ldb < max(1,*m)) { - info = 11; - } - if (info != 0) { - xerbla_("ZTRSM ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* And when alpha.eq.zero. */ - - if (alpha->r == 0. && alpha->i == 0.) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - b[i__3].r = 0., b[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* Start the operations. */ - - if (lside) { - if (lsame_(transa, "N")) { - -/* Form B := alpha*inv( A )*B. */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1. || alpha->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, z__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L30: */ - } - } - for (k = *m; k >= 1; --k) { - i__2 = k + j * b_dim1; - if (b[i__2].r != 0. || b[i__2].i != 0.) { - if (nounit) { - i__2 = k + j * b_dim1; - z_div(&z__1, &b[k + j * b_dim1], &a[k + k * - a_dim1]); - b[i__2].r = z__1.r, b[i__2].i = z__1.i; - } - i__2 = k - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = k + j * b_dim1; - i__6 = i__ + k * a_dim1; - z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * - a[i__6].i, z__2.i = b[i__5].r * a[ - i__6].i + b[i__5].i * a[i__6].r; - z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] - .i - z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L40: */ - } - } -/* L50: */ - } -/* L60: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1. || alpha->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, z__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L70: */ - } - } - i__2 = *m; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * b_dim1; - if (b[i__3].r != 0. || b[i__3].i != 0.) { - if (nounit) { - i__3 = k + j * b_dim1; - z_div(&z__1, &b[k + j * b_dim1], &a[k + k * - a_dim1]); - b[i__3].r = z__1.r, b[i__3].i = z__1.i; - } - i__3 = *m; - for (i__ = k + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = k + j * b_dim1; - i__7 = i__ + k * a_dim1; - z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * - a[i__7].i, z__2.i = b[i__6].r * a[ - i__7].i + b[i__6].i * a[i__7].r; - z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] - .i - z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L80: */ - } - } -/* L90: */ - } -/* L100: */ - } - } - } else { - -/* - Form B := alpha*inv( A' )*B - or B := alpha*inv( conjg( A' ) )*B. -*/ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, - z__1.i = alpha->r * b[i__3].i + alpha->i * b[ - i__3].r; - temp.r = z__1.r, temp.i = z__1.i; - if (noconj) { - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - i__4 = k + i__ * a_dim1; - i__5 = k + j * b_dim1; - z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * - b[i__5].i, z__2.i = a[i__4].r * b[ - i__5].i + a[i__4].i * b[i__5].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L110: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__3 = i__ - 1; - for (k = 1; k <= i__3; ++k) { - d_cnjg(&z__3, &a[k + i__ * a_dim1]); - i__4 = k + j * b_dim1; - z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] - .i, z__2.i = z__3.r * b[i__4].i + - z__3.i * b[i__4].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L120: */ - } - if (nounit) { - d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__3 = i__ + j * b_dim1; - b[i__3].r = temp.r, b[i__3].i = temp.i; -/* L130: */ - } -/* L140: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - for (i__ = *m; i__ >= 1; --i__) { - i__2 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, - z__1.i = alpha->r * b[i__2].i + alpha->i * b[ - i__2].r; - temp.r = z__1.r, temp.i = z__1.i; - if (noconj) { - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - i__3 = k + i__ * a_dim1; - i__4 = k + j * b_dim1; - z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * - b[i__4].i, z__2.i = a[i__3].r * b[ - i__4].i + a[i__3].i * b[i__4].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L150: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__2 = *m; - for (k = i__ + 1; k <= i__2; ++k) { - d_cnjg(&z__3, &a[k + i__ * a_dim1]); - i__3 = k + j * b_dim1; - z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] - .i, z__2.i = z__3.r * b[i__3].i + - z__3.i * b[i__3].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L160: */ - } - if (nounit) { - d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__2 = i__ + j * b_dim1; - b[i__2].r = temp.r, b[i__2].i = temp.i; -/* L170: */ - } -/* L180: */ - } - } - } - } else { - if (lsame_(transa, "N")) { - -/* Form B := alpha*B*inv( A ). */ - - if (upper) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (alpha->r != 1. || alpha->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, z__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L190: */ - } - } - i__2 = j - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + j * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = k + j * a_dim1; - i__7 = i__ + k * b_dim1; - z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * - b[i__7].i, z__2.i = a[i__6].r * b[ - i__7].i + a[i__6].i * b[i__7].r; - z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] - .i - z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L200: */ - } - } -/* L210: */ - } - if (nounit) { - z_div(&z__1, &c_b1071, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - z__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L220: */ - } - } -/* L230: */ - } - } else { - for (j = *n; j >= 1; --j) { - if (alpha->r != 1. || alpha->i != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, z__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L240: */ - } - } - i__1 = *n; - for (k = j + 1; k <= i__1; ++k) { - i__2 = k + j * a_dim1; - if (a[i__2].r != 0. || a[i__2].i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = k + j * a_dim1; - i__6 = i__ + k * b_dim1; - z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * - b[i__6].i, z__2.i = a[i__5].r * b[ - i__6].i + a[i__5].i * b[i__6].r; - z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] - .i - z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L250: */ - } - } -/* L260: */ - } - if (nounit) { - z_div(&z__1, &c_b1071, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * b_dim1; - i__3 = i__ + j * b_dim1; - z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - z__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L270: */ - } - } -/* L280: */ - } - } - } else { - -/* - Form B := alpha*B*inv( A' ) - or B := alpha*B*inv( conjg( A' ) ). -*/ - - if (upper) { - for (k = *n; k >= 1; --k) { - if (nounit) { - if (noconj) { - z_div(&z__1, &c_b1071, &a[k + k * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[k + k * a_dim1]); - z_div(&z__1, &c_b1071, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, - z__1.i = temp.r * b[i__3].i + temp.i * b[ - i__3].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L290: */ - } - } - i__1 = k - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = j + k * a_dim1; - if (a[i__2].r != 0. || a[i__2].i != 0.) { - if (noconj) { - i__2 = j + k * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - } else { - d_cnjg(&z__1, &a[j + k * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * b_dim1; - i__5 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] - .i, z__2.i = temp.r * b[i__5].i + - temp.i * b[i__5].r; - z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] - .i - z__2.i; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L300: */ - } - } -/* L310: */ - } - if (alpha->r != 1. || alpha->i != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + k * b_dim1; - i__3 = i__ + k * b_dim1; - z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] - .i, z__1.i = alpha->r * b[i__3].i + - alpha->i * b[i__3].r; - b[i__2].r = z__1.r, b[i__2].i = z__1.i; -/* L320: */ - } - } -/* L330: */ - } - } else { - i__1 = *n; - for (k = 1; k <= i__1; ++k) { - if (nounit) { - if (noconj) { - z_div(&z__1, &c_b1071, &a[k + k * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } else { - d_cnjg(&z__2, &a[k + k * a_dim1]); - z_div(&z__1, &c_b1071, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, - z__1.i = temp.r * b[i__4].i + temp.i * b[ - i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L340: */ - } - } - i__2 = *n; - for (j = k + 1; j <= i__2; ++j) { - i__3 = j + k * a_dim1; - if (a[i__3].r != 0. || a[i__3].i != 0.) { - if (noconj) { - i__3 = j + k * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - } else { - d_cnjg(&z__1, &a[j + k * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - i__3 = *m; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * b_dim1; - i__5 = i__ + j * b_dim1; - i__6 = i__ + k * b_dim1; - z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] - .i, z__2.i = temp.r * b[i__6].i + - temp.i * b[i__6].r; - z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] - .i - z__2.i; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L350: */ - } - } -/* L360: */ - } - if (alpha->r != 1. || alpha->i != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + k * b_dim1; - i__4 = i__ + k * b_dim1; - z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] - .i, z__1.i = alpha->r * b[i__4].i + - alpha->i * b[i__4].r; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L370: */ - } - } -/* L380: */ - } - } - } - } - - return 0; - -/* End of ZTRSM . */ - -} /* ztrsm_ */ - -/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, - doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2, z__3; - - /* Builtin functions */ - void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( - doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, ix, jx, kx, info; - static doublecomplex temp; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconj, nounit; - - -/* - Purpose - ======= - - ZTRSV solves one of the systems of equations - - A*x = b, or A'*x = b, or conjg( A' )*x = b, - - where b and x are n element vectors and A is an n by n unit, or - non-unit, upper or lower triangular matrix. - - No test for singularity or near-singularity is included in this - routine. Such tests must be performed before calling this routine. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the equations to be solved as - follows: - - TRANS = 'N' or 'n' A*x = b. - - TRANS = 'T' or 't' A'*x = b. - - TRANS = 'C' or 'c' conjg( A' )*x = b. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - COMPLEX*16 array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - triangular matrix and the strictly lower triangular part of - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - triangular matrix and the strictly upper triangular part of - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - COMPLEX*16 array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element right-hand side vector b. On exit, X is overwritten - with the solution vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - - /* Function Body */ - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T") && ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, - "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < max(1,*n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("ZTRSV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - noconj = lsame_(trans, "T"); - nounit = lsame_(diag, "N"); - -/* - Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. -*/ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* - Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. -*/ - - if (lsame_(trans, "N")) { - -/* Form x := inv( A )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - if (nounit) { - i__1 = j; - z_div(&z__1, &x[j], &a[j + j * a_dim1]); - x[i__1].r = z__1.r, x[i__1].i = z__1.i; - } - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - for (i__ = j - 1; i__ >= 1; --i__) { - i__1 = i__; - i__2 = i__; - i__3 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - z__2.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - - z__2.i; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; -/* L10: */ - } - } -/* L20: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - i__1 = jx; - if (x[i__1].r != 0. || x[i__1].i != 0.) { - if (nounit) { - i__1 = jx; - z_div(&z__1, &x[jx], &a[j + j * a_dim1]); - x[i__1].r = z__1.r, x[i__1].i = z__1.i; - } - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - ix = jx; - for (i__ = j - 1; i__ >= 1; --i__) { - ix -= *incx; - i__1 = ix; - i__2 = ix; - i__3 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, - z__2.i = temp.r * a[i__3].i + temp.i * a[ - i__3].r; - z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - - z__2.i; - x[i__1].r = z__1.r, x[i__1].i = z__1.i; -/* L30: */ - } - } - jx -= *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - if (nounit) { - i__2 = j; - z_div(&z__1, &x[j], &a[j + j * a_dim1]); - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - } - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__; - i__4 = i__; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - - z__2.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; -/* L50: */ - } - } -/* L60: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = jx; - if (x[i__2].r != 0. || x[i__2].i != 0.) { - if (nounit) { - i__2 = jx; - z_div(&z__1, &x[jx], &a[j + j * a_dim1]); - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - } - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - ix = jx; - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - ix += *incx; - i__3 = ix; - i__4 = ix; - i__5 = i__ + j * a_dim1; - z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, - z__2.i = temp.r * a[i__5].i + temp.i * a[ - i__5].r; - z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - - z__2.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, z__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L90: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = i__; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[ - i__3].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L100: */ - } - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__2 = j; - x[i__2].r = temp.r, x[i__2].i = temp.i; -/* L110: */ - } - } else { - jx = kx; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - ix = kx; - i__2 = jx; - temp.r = x[i__2].r, temp.i = x[i__2].i; - if (noconj) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = ix; - z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ - i__4].i, z__2.i = a[i__3].r * x[i__4].i + - a[i__3].i * x[i__4].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L120: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__3 = ix; - z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, - z__2.i = z__3.r * x[i__3].i + z__3.i * x[ - i__3].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix += *incx; -/* L130: */ - } - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__2 = jx; - x[i__2].r = temp.r, x[i__2].i = temp.i; - jx += *incx; -/* L140: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - i__1 = j; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__; - z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ - i__3].i, z__2.i = a[i__2].r * x[i__3].i + - a[i__2].i * x[i__3].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L150: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__2 = i__; - z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, - z__2.i = z__3.r * x[i__2].i + z__3.i * x[ - i__2].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; -/* L160: */ - } - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__1 = j; - x[i__1].r = temp.r, x[i__1].i = temp.i; -/* L170: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - ix = kx; - i__1 = jx; - temp.r = x[i__1].r, temp.i = x[i__1].i; - if (noconj) { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = ix; - z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ - i__3].i, z__2.i = a[i__2].r * x[i__3].i + - a[i__2].i * x[i__3].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; -/* L180: */ - } - if (nounit) { - z_div(&z__1, &temp, &a[j + j * a_dim1]); - temp.r = z__1.r, temp.i = z__1.i; - } - } else { - i__1 = j + 1; - for (i__ = *n; i__ >= i__1; --i__) { - d_cnjg(&z__3, &a[i__ + j * a_dim1]); - i__2 = ix; - z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, - z__2.i = z__3.r * x[i__2].i + z__3.i * x[ - i__2].r; - z__1.r = temp.r - z__2.r, z__1.i = temp.i - - z__2.i; - temp.r = z__1.r, temp.i = z__1.i; - ix -= *incx; -/* L190: */ - } - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z_div(&z__1, &temp, &z__2); - temp.r = z__1.r, temp.i = z__1.i; - } - } - i__1 = jx; - x[i__1].r = temp.r, x[i__1].i = temp.i; - jx -= *incx; -/* L200: */ - } - } - } - } - - return 0; - -/* End of ZTRSV . */ - -} /* ztrsv_ */ - diff --git a/numpy/linalg/lapack_lite/dlapack_lite.c b/numpy/linalg/lapack_lite/dlapack_lite.c deleted file mode 100644 index 116aa6ceb..000000000 --- a/numpy/linalg/lapack_lite/dlapack_lite.c +++ /dev/null @@ -1,96068 +0,0 @@ -/* -NOTE: This is generated code. Look in Misc/lapack_lite for information on - remaking this file. -*/ -#include "f2c.h" - -#ifdef HAVE_CONFIG -#include "config.h" -#else -extern doublereal dlamch_(char *); -#define EPSILON dlamch_("Epsilon") -#define SAFEMINIMUM dlamch_("Safe minimum") -#define PRECISION dlamch_("Precision") -#define BASE dlamch_("Base") -#endif - -extern doublereal dlapy2_(doublereal *x, doublereal *y); - -/* -f2c knows the exact rules for precedence, and so omits parentheses where not -strictly necessary. Since this is generated code, we don't really care if -it's readable, and we know what is written is correct. So don't warn about -them. -*/ -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wparentheses" -#endif - - -/* Table of constant values */ - -static integer c__1 = 1; -static complex c_b55 = {0.f,0.f}; -static complex c_b56 = {1.f,0.f}; -static integer c_n1 = -1; -static integer c__3 = 3; -static integer c__2 = 2; -static integer c__0 = 0; -static integer c__8 = 8; -static integer c__4 = 4; -static integer c__65 = 65; -static real c_b871 = 1.f; -static integer c__15 = 15; -static logical c_false = FALSE_; -static real c_b1101 = 0.f; -static integer c__9 = 9; -static real c_b1150 = -1.f; -static real c_b1794 = .5f; -static doublereal c_b2453 = 1.; -static doublereal c_b2467 = 0.; -static doublereal c_b2532 = -.125; -static doublereal c_b2589 = -1.; -static integer c__6 = 6; -static integer c__10 = 10; -static integer c__11 = 11; -static doublereal c_b5242 = 2.; -static logical c_true = TRUE_; -static real c_b8920 = 2.f; - -/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo, - integer *ihi, real *scale, integer *m, complex *v, integer *ldv, - integer *info) -{ - /* System generated locals */ - integer v_dim1, v_offset, i__1; - - /* Local variables */ - static integer i__, k; - static real s; - static integer ii; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cswap_(integer *, complex *, integer *, - complex *, integer *); - static logical leftv; - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), xerbla_(char *, integer *); - static logical rightv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGEBAK forms the right or left eigenvectors of a complex general - matrix by backward transformation on the computed eigenvectors of the - balanced matrix output by CGEBAL. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the type of backward transformation required: - = 'N', do nothing, return immediately; - = 'P', do backward transformation for permutation only; - = 'S', do backward transformation for scaling only; - = 'B', do backward transformations for both permutation and - scaling. - JOB must be the same as the argument JOB supplied to CGEBAL. - - SIDE (input) CHARACTER*1 - = 'R': V contains right eigenvectors; - = 'L': V contains left eigenvectors. - - N (input) INTEGER - The number of rows of the matrix V. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - The integers ILO and IHI determined by CGEBAL. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - SCALE (input) REAL array, dimension (N) - Details of the permutation and scaling factors, as returned - by CGEBAL. - - M (input) INTEGER - The number of columns of the matrix V. M >= 0. - - V (input/output) COMPLEX array, dimension (LDV,M) - On entry, the matrix of right or left eigenvectors to be - transformed, as returned by CHSEIN or CTREVC. - On exit, V is overwritten by the transformed eigenvectors. - - LDV (input) INTEGER - The leading dimension of the array V. LDV >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Decode and Test the input parameters -*/ - - /* Parameter adjustments */ - --scale; - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - - /* Function Body */ - rightv = lsame_(side, "R"); - leftv = lsame_(side, "L"); - - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (! rightv && ! leftv) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*m < 0) { - *info = -7; - } else if (*ldv < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEBAK", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*m == 0) { - return 0; - } - if (lsame_(job, "N")) { - return 0; - } - - if (*ilo == *ihi) { - goto L30; - } - -/* Backward balance */ - - if (lsame_(job, "S") || lsame_(job, "B")) { - - if (rightv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = scale[i__]; - csscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L10: */ - } - } - - if (leftv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = 1.f / scale[i__]; - csscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L20: */ - } - } - - } - -/* - Backward permutation - - For I = ILO-1 step -1 until 1, - IHI+1 step 1 until N do -- -*/ - -L30: - if (lsame_(job, "P") || lsame_(job, "B")) { - if (rightv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L40; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = scale[i__]; - if (k == i__) { - goto L40; - } - cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L40: - ; - } - } - - if (leftv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L50; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = scale[i__]; - if (k == i__) { - goto L50; - } - cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L50: - ; - } - } - } - - return 0; - -/* End of CGEBAK */ - -} /* cgebak_ */ - -/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda, - integer *ilo, integer *ihi, real *scale, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1, r__2; - - /* Builtin functions */ - double r_imag(complex *), c_abs(complex *); - - /* Local variables */ - static real c__, f, g; - static integer i__, j, k, l, m; - static real r__, s, ca, ra; - static integer ica, ira, iexc; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cswap_(integer *, complex *, integer *, - complex *, integer *); - static real sfmin1, sfmin2, sfmax1, sfmax2; - extern integer icamax_(integer *, complex *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), xerbla_(char *, integer *); - static logical noconv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGEBAL balances a general complex matrix A. This involves, first, - permuting A by a similarity transformation to isolate eigenvalues - in the first 1 to ILO-1 and last IHI+1 to N elements on the - diagonal; and second, applying a diagonal similarity transformation - to rows and columns ILO to IHI to make the rows and columns as - close in norm as possible. Both steps are optional. - - Balancing may reduce the 1-norm of the matrix, and improve the - accuracy of the computed eigenvalues and/or eigenvectors. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the operations to be performed on A: - = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 - for i = 1,...,N; - = 'P': permute only; - = 'S': scale only; - = 'B': both permute and scale. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the input matrix A. - On exit, A is overwritten by the balanced matrix. - If JOB = 'N', A is not referenced. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - ILO (output) INTEGER - IHI (output) INTEGER - ILO and IHI are set to integers such that on exit - A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. - If JOB = 'N' or 'S', ILO = 1 and IHI = N. - - SCALE (output) REAL array, dimension (N) - Details of the permutations and scaling factors applied to - A. If P(j) is the index of the row and column interchanged - with row and column j and D(j) is the scaling factor - applied to row and column j, then - SCALE(j) = P(j) for j = 1,...,ILO-1 - = D(j) for j = ILO,...,IHI - = P(j) for j = IHI+1,...,N. - The order in which the interchanges are made is N to IHI+1, - then 1 to ILO-1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The permutations consist of row and column interchanges which put - the matrix in the form - - ( T1 X Y ) - P A P = ( 0 B Z ) - ( 0 0 T2 ) - - where T1 and T2 are upper triangular matrices whose eigenvalues lie - along the diagonal. The column indices ILO and IHI mark the starting - and ending columns of the submatrix B. Balancing consists of applying - a diagonal similarity transformation inv(D) * B * D to make the - 1-norms of each row of B and its corresponding column nearly equal. - The output matrix is - - ( T1 X*D Y ) - ( 0 inv(D)*B*D inv(D)*Z ). - ( 0 0 T2 ) - - Information about the permutations P and the diagonal matrix D is - returned in the vector SCALE. - - This subroutine is based on the EISPACK routine CBAL. - - Modified by Tzu-Yi Chen, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --scale; - - /* Function Body */ - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEBAL", &i__1); - return 0; - } - - k = 1; - l = *n; - - if (*n == 0) { - goto L210; - } - - if (lsame_(job, "N")) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scale[i__] = 1.f; -/* L10: */ - } - goto L210; - } - - if (lsame_(job, "S")) { - goto L120; - } - -/* Permutation to isolate eigenvalues if possible */ - - goto L50; - -/* Row and column exchange. */ - -L20: - scale[m] = (real) j; - if (j == m) { - goto L30; - } - - cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); - i__1 = *n - k + 1; - cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); - -L30: - switch (iexc) { - case 1: goto L40; - case 2: goto L80; - } - -/* Search for rows isolating an eigenvalue and push them down. */ - -L40: - if (l == 1) { - goto L210; - } - --l; - -L50: - for (j = l; j >= 1; --j) { - - i__1 = l; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ == j) { - goto L60; - } - i__2 = j + i__ * a_dim1; - if (a[i__2].r != 0.f || r_imag(&a[j + i__ * a_dim1]) != 0.f) { - goto L70; - } -L60: - ; - } - - m = l; - iexc = 1; - goto L20; -L70: - ; - } - - goto L90; - -/* Search for columns isolating an eigenvalue and push them left. */ - -L80: - ++k; - -L90: - i__1 = l; - for (j = k; j <= i__1; ++j) { - - i__2 = l; - for (i__ = k; i__ <= i__2; ++i__) { - if (i__ == j) { - goto L100; - } - i__3 = i__ + j * a_dim1; - if (a[i__3].r != 0.f || r_imag(&a[i__ + j * a_dim1]) != 0.f) { - goto L110; - } -L100: - ; - } - - m = k; - iexc = 2; - goto L20; -L110: - ; - } - -L120: - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - scale[i__] = 1.f; -/* L130: */ - } - - if (lsame_(job, "P")) { - goto L210; - } - -/* - Balance the submatrix in rows K to L. - - Iterative loop for norm reduction -*/ - - sfmin1 = slamch_("S") / slamch_("P"); - sfmax1 = 1.f / sfmin1; - sfmin2 = sfmin1 * 8.f; - sfmax2 = 1.f / sfmin2; -L140: - noconv = FALSE_; - - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - c__ = 0.f; - r__ = 0.f; - - i__2 = l; - for (j = k; j <= i__2; ++j) { - if (j == i__) { - goto L150; - } - i__3 = j + i__ * a_dim1; - c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__ - * a_dim1]), dabs(r__2)); - i__3 = i__ + j * a_dim1; - r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j - * a_dim1]), dabs(r__2)); -L150: - ; - } - ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1); - ca = c_abs(&a[ica + i__ * a_dim1]); - i__2 = *n - k + 1; - ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda); - ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]); - -/* Guard against zero C or R due to underflow. */ - - if (c__ == 0.f || r__ == 0.f) { - goto L200; - } - g = r__ / 8.f; - f = 1.f; - s = c__ + r__; -L160: -/* Computing MAX */ - r__1 = max(f,c__); -/* Computing MIN */ - r__2 = min(r__,g); - if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { - goto L170; - } - f *= 8.f; - c__ *= 8.f; - ca *= 8.f; - r__ /= 8.f; - g /= 8.f; - ra /= 8.f; - goto L160; - -L170: - g = c__ / 8.f; -L180: -/* Computing MIN */ - r__1 = min(f,c__), r__1 = min(r__1,g); - if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { - goto L190; - } - f /= 8.f; - c__ /= 8.f; - g /= 8.f; - ca /= 8.f; - r__ *= 8.f; - ra *= 8.f; - goto L180; - -/* Now balance. */ - -L190: - if (c__ + r__ >= s * .95f) { - goto L200; - } - if (f < 1.f && scale[i__] < 1.f) { - if (f * scale[i__] <= sfmin1) { - goto L200; - } - } - if (f > 1.f && scale[i__] > 1.f) { - if (scale[i__] >= sfmax1 / f) { - goto L200; - } - } - g = 1.f / f; - scale[i__] *= f; - noconv = TRUE_; - - i__2 = *n - k + 1; - csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); - csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); - -L200: - ; - } - - if (noconv) { - goto L140; - } - -L210: - *ilo = k; - *ihi = l; - - return 0; - -/* End of CGEBAL */ - -} /* cgebal_ */ - -/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda, - real *d__, real *e, complex *tauq, complex *taup, complex *work, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__; - static complex alpha; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *), - clarfg_(integer *, complex *, complex *, integer *, complex *), - clacgv_(integer *, complex *, integer *), xerbla_(char *, integer - *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGEBD2 reduces a complex general m by n matrix A to upper or lower - real bidiagonal form B by a unitary transformation: Q' * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the unitary matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the unitary matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) REAL array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) COMPLEX array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - WORK (workspace) COMPLEX array, dimension (max(M,N)) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in - A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in - A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, v and u are complex vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("CGEBD2", &i__1); - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & - tauq[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Apply H(i)' to A(i:m,i+1:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - r_cnjg(&q__1, &tauq[i__]); - clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1, - &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - i__2 = i__ + i__ * a_dim1; - i__3 = i__; - a[i__2].r = d__[i__3], a[i__2].i = 0.f; - - if (i__ < *n) { - -/* - Generate elementary reflector G(i) to annihilate - A(i,i+2:n) -*/ - - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + (i__ + 1) * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Apply G(i) to A(i+1:m,i+1:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__; - clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], - lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + (i__ + 1) * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.f; - } else { - i__2 = i__; - taup[i__2].r = 0.f, taup[i__2].i = 0.f; - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Apply G(i) to A(i+1:m,i:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; -/* Computing MIN */ - i__4 = i__ + 1; - clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ - i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__; - a[i__2].r = d__[i__3], a[i__2].i = 0.f; - - if (i__ < *m) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:m,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, - &tauq[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ - - i__2 = *m - i__; - i__3 = *n - i__; - r_cnjg(&q__1, &tauq[i__]); - clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & - c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & - work[1]); - i__2 = i__ + 1 + i__ * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.f; - } else { - i__2 = i__; - tauq[i__2].r = 0.f, tauq[i__2].i = 0.f; - } -/* L20: */ - } - } - return 0; - -/* End of CGEBD2 */ - -} /* cgebd2_ */ - -/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda, - real *d__, real *e, complex *tauq, complex *taup, complex *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - real r__1; - complex q__1; - - /* Local variables */ - static integer i__, j, nb, nx; - static real ws; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *); - static integer nbmin, iinfo, minmn; - extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *, - integer *, real *, real *, complex *, complex *, complex *, - integer *), clabrd_(integer *, integer *, integer *, complex *, - integer *, real *, real *, complex *, complex *, complex *, - integer *, complex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwrkx, ldwrky, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGEBRD reduces a general complex M-by-N matrix A to upper or lower - bidiagonal form B by a unitary transformation: Q**H * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the M-by-N general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the unitary matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the unitary matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) REAL array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) COMPLEX array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,M,N). - For optimum performance LWORK >= (M+N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in - A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in - A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; -/* Computing MAX */ - i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = max(i__1,i__2); - lwkopt = (*m + *n) * nb; - r__1 = (real) lwkopt; - work[1].r = r__1, work[1].i = 0.f; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = max(1,*m); - if (*lwork < max(i__1,*n) && ! lquery) { - *info = -10; - } - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("CGEBRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - minmn = min(*m,*n); - if (minmn == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - ws = (real) max(*m,*n); - ldwrkx = *m; - ldwrky = *n; - - if (nb > 1 && nb < minmn) { - -/* - Set the crossover point NX. - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - -/* Determine when to switch from blocked to unblocked code. */ - - if (nx < minmn) { - ws = (real) ((*m + *n) * nb); - if ((real) (*lwork) < ws) { - -/* - Not enough work space for the optimal NB, consider using - a smaller block size. -*/ - - nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - if (*lwork >= (*m + *n) * nbmin) { - nb = *lwork / (*m + *n); - } else { - nb = 1; - nx = minmn; - } - } - } - } else { - nx = minmn; - } - - i__1 = minmn - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - -/* - Reduce rows and columns i:i+ib-1 to bidiagonal form and return - the matrices X and Y which are needed to update the unreduced - part of the matrix -*/ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ + 1; - clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ - i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx - * nb + 1], &ldwrky); - -/* - Update the trailing submatrix A(i+ib:m,i+ib:n), using - an update of the form A := A - V*Y' - X*U' -*/ - - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & - q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + - nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], - lda); - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, & - work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & - c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - -/* Copy diagonal and off-diagonal elements of B back into A */ - - if (*m >= *n) { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + j * a_dim1; - i__5 = j; - a[i__4].r = d__[i__5], a[i__4].i = 0.f; - i__4 = j + (j + 1) * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.f; -/* L10: */ - } - } else { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + j * a_dim1; - i__5 = j; - a[i__4].r = d__[i__5], a[i__4].i = 0.f; - i__4 = j + 1 + j * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.f; -/* L20: */ - } - } -/* L30: */ - } - -/* Use unblocked code to reduce the remainder of the matrix */ - - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & - tauq[i__], &taup[i__], &work[1], &iinfo); - work[1].r = ws, work[1].i = 0.f; - return 0; - -/* End of CGEBRD */ - -} /* cgebrd_ */ - -/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, - integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, - integer *ldvr, complex *work, integer *lwork, real *rwork, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4; - real r__1, r__2; - complex q__1, q__2; - - /* Builtin functions */ - double sqrt(doublereal), r_imag(complex *); - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, k, ihi; - static real scl; - static integer ilo; - static real dum[1], eps; - static complex tmp; - static integer ibal; - static char side[1]; - static integer maxb; - static real anrm; - static integer ierr, itau, iwrk, nout; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *); - extern logical lsame_(char *, char *); - extern doublereal scnrm2_(integer *, complex *, integer *); - extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *, - integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *, - integer *, integer *, real *, integer *), slabad_(real *, - real *); - static logical scalea; - extern doublereal clange_(char *, integer *, integer *, complex *, - integer *, real *); - static real cscale; - extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *, integer *), - clascl_(char *, integer *, integer *, real *, real *, integer *, - integer *, complex *, integer *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), clacpy_(char *, integer *, integer *, complex *, integer *, - complex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical select[1]; - static real bignum; - extern integer isamax_(integer *, real *, integer *); - extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *), ctrevc_(char *, - char *, logical *, integer *, complex *, integer *, complex *, - integer *, complex *, integer *, integer *, integer *, complex *, - real *, integer *), cunghr_(integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - integer *); - static integer minwrk, maxwrk; - static logical wantvl; - static real smlnum; - static integer hswork, irwork; - static logical lquery, wantvr; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGEEV computes for an N-by-N complex nonsymmetric matrix A, the - eigenvalues and, optionally, the left and/or right eigenvectors. - - The right eigenvector v(j) of A satisfies - A * v(j) = lambda(j) * v(j) - where lambda(j) is its eigenvalue. - The left eigenvector u(j) of A satisfies - u(j)**H * A = lambda(j) * u(j)**H - where u(j)**H denotes the conjugate transpose of u(j). - - The computed eigenvectors are normalized to have Euclidean norm - equal to 1 and largest component real. - - Arguments - ========= - - JOBVL (input) CHARACTER*1 - = 'N': left eigenvectors of A are not computed; - = 'V': left eigenvectors of are computed. - - JOBVR (input) CHARACTER*1 - = 'N': right eigenvectors of A are not computed; - = 'V': right eigenvectors of A are computed. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the N-by-N matrix A. - On exit, A has been overwritten. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) COMPLEX array, dimension (N) - W contains the computed eigenvalues. - - VL (output) COMPLEX array, dimension (LDVL,N) - If JOBVL = 'V', the left eigenvectors u(j) are stored one - after another in the columns of VL, in the same order - as their eigenvalues. - If JOBVL = 'N', VL is not referenced. - u(j) = VL(:,j), the j-th column of VL. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= 1; if - JOBVL = 'V', LDVL >= N. - - VR (output) COMPLEX array, dimension (LDVR,N) - If JOBVR = 'V', the right eigenvectors v(j) are stored one - after another in the columns of VR, in the same order - as their eigenvalues. - If JOBVR = 'N', VR is not referenced. - v(j) = VR(:,j), the j-th column of VR. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= 1; if - JOBVR = 'V', LDVR >= N. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,2*N). - For good performance, LWORK must generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace) REAL array, dimension (2*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the - eigenvalues, and no eigenvectors have been computed; - elements and i+1:N of W contain eigenvalues which have - converged. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - --rwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1; - wantvl = lsame_(jobvl, "V"); - wantvr = lsame_(jobvr, "V"); - if (! wantvl && ! lsame_(jobvl, "N")) { - *info = -1; - } else if (! wantvr && ! lsame_(jobvr, "N")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldvl < 1 || wantvl && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || wantvr && *ldvr < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - CWorkspace refers to complex workspace, and RWorkspace to real - workspace. NB refers to the optimal block size for the - immediately following subroutine, as returned by ILAENV. - HSWORK refers to the workspace preferred by CHSEQR, as - calculated below. HSWORK is computed assuming ILO=1 and IHI=N, - the worst case.) -*/ - - minwrk = 1; - if (*info == 0 && (*lwork >= 1 || lquery)) { - maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, ( - ftnlen)6, (ftnlen)1); - if (! wantvl && ! wantvr) { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 1; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); - maxwrk = max(maxwrk,hswork); - } else { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 1; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR", - " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = max(maxwrk,hswork), i__2 = *n << 1; - maxwrk = max(i__1,i__2); - } - work[1].r = (real) maxwrk, work[1].i = 0.f; - } - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEEV ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Get machine constants */ - - eps = slamch_("P"); - smlnum = slamch_("S"); - bignum = 1.f / smlnum; - slabad_(&smlnum, &bignum); - smlnum = sqrt(smlnum) / eps; - bignum = 1.f / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = clange_("M", n, n, &a[a_offset], lda, dum); - scalea = FALSE_; - if (anrm > 0.f && anrm < smlnum) { - scalea = TRUE_; - cscale = smlnum; - } else if (anrm > bignum) { - scalea = TRUE_; - cscale = bignum; - } - if (scalea) { - clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & - ierr); - } - -/* - Balance the matrix - (CWorkspace: none) - (RWorkspace: need N) -*/ - - ibal = 1; - cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); - -/* - Reduce to upper Hessenberg form - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: none) -*/ - - itau = 1; - iwrk = itau + *n; - i__1 = *lwork - iwrk + 1; - cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, - &ierr); - - if (wantvl) { - -/* - Want left eigenvectors - Copy Householder vectors to VL -*/ - - *(unsigned char *)side = 'L'; - clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) - ; - -/* - Generate unitary matrix in VL - (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) - (RWorkspace: none) -*/ - - i__1 = *lwork - iwrk + 1; - cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VL - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ - vl_offset], ldvl, &work[iwrk], &i__1, info); - - if (wantvr) { - -/* - Want left and right eigenvectors - Copy Schur vectors to VR -*/ - - *(unsigned char *)side = 'B'; - clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); - } - - } else if (wantvr) { - -/* - Want right eigenvectors - Copy Householder vectors to VR -*/ - - *(unsigned char *)side = 'R'; - clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) - ; - -/* - Generate unitary matrix in VR - (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) - (RWorkspace: none) -*/ - - i__1 = *lwork - iwrk + 1; - cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VR - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ - vr_offset], ldvr, &work[iwrk], &i__1, info); - - } else { - -/* - Compute eigenvalues only - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ - vr_offset], ldvr, &work[iwrk], &i__1, info); - } - -/* If INFO > 0 from CHSEQR, then quit */ - - if (*info > 0) { - goto L50; - } - - if (wantvl || wantvr) { - -/* - Compute left and/or right eigenvectors - (CWorkspace: need 2*N) - (RWorkspace: need 2*N) -*/ - - irwork = ibal + *n; - ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, - &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], - &ierr); - } - - if (wantvl) { - -/* - Undo balancing of left eigenvectors - (CWorkspace: none) - (RWorkspace: need N) -*/ - - cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], - ldvl, &ierr); - -/* Normalize left eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * vl_dim1; -/* Computing 2nd power */ - r__1 = vl[i__3].r; -/* Computing 2nd power */ - r__2 = r_imag(&vl[k + i__ * vl_dim1]); - rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; -/* L10: */ - } - k = isamax_(n, &rwork[irwork], &c__1); - r_cnjg(&q__2, &vl[k + i__ * vl_dim1]); - r__1 = sqrt(rwork[irwork + k - 1]); - q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; - tmp.r = q__1.r, tmp.i = q__1.i; - cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); - i__2 = k + i__ * vl_dim1; - i__3 = k + i__ * vl_dim1; - r__1 = vl[i__3].r; - q__1.r = r__1, q__1.i = 0.f; - vl[i__2].r = q__1.r, vl[i__2].i = q__1.i; -/* L20: */ - } - } - - if (wantvr) { - -/* - Undo balancing of right eigenvectors - (CWorkspace: none) - (RWorkspace: need N) -*/ - - cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], - ldvr, &ierr); - -/* Normalize right eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * vr_dim1; -/* Computing 2nd power */ - r__1 = vr[i__3].r; -/* Computing 2nd power */ - r__2 = r_imag(&vr[k + i__ * vr_dim1]); - rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; -/* L30: */ - } - k = isamax_(n, &rwork[irwork], &c__1); - r_cnjg(&q__2, &vr[k + i__ * vr_dim1]); - r__1 = sqrt(rwork[irwork + k - 1]); - q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; - tmp.r = q__1.r, tmp.i = q__1.i; - cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); - i__2 = k + i__ * vr_dim1; - i__3 = k + i__ * vr_dim1; - r__1 = vr[i__3].r; - q__1.r = r__1, q__1.i = 0.f; - vr[i__2].r = q__1.r, vr[i__2].i = q__1.i; -/* L40: */ - } - } - -/* Undo scaling if necessary */ - -L50: - if (scalea) { - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] - , &i__2, &ierr); - if (*info > 0) { - i__1 = ilo - 1; - clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, - &ierr); - } - } - - work[1].r = (real) maxwrk, work[1].i = 0.f; - return 0; - -/* End of CGEEV */ - -} /* cgeev_ */ - -/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex * - a, integer *lda, complex *tau, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__; - static complex alpha; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *), - clarfg_(integer *, complex *, complex *, integer *, complex *), - xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGEHD2 reduces a complex general matrix A to upper Hessenberg form H - by a unitary similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to CGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= max(1,N). - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the n by n general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the unitary matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEHD2", &i__1); - return 0; - } - - i__1 = *ihi - 1; - for (i__ = *ilo; i__ <= i__1; ++i__) { - -/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *ihi - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ - i__]); - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ - - i__2 = *ihi - i__; - clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); - -/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ - - i__2 = *ihi - i__; - i__3 = *n - i__; - r_cnjg(&q__1, &tau[i__]); - clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1, - &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); - - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; -/* L10: */ - } - - return 0; - -/* End of CGEHD2 */ - -} /* cgehd2_ */ - -/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex * - a, integer *lda, complex *tau, complex *work, integer *lwork, integer - *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - complex q__1; - - /* Local variables */ - static integer i__; - static complex t[4160] /* was [65][64] */; - static integer ib; - static complex ei; - static integer nb, nh, nx, iws; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *); - static integer nbmin, iinfo; - extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *), clarfb_( - char *, char *, char *, char *, integer *, integer *, integer *, - complex *, integer *, complex *, integer *, complex *, integer *, - complex *, integer *), clahrd_( - integer *, integer *, integer *, complex *, integer *, complex *, - complex *, integer *, complex *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGEHRD reduces a complex general matrix A to upper Hessenberg form H - by a unitary similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to CGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the N-by-N general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the unitary matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to - zero. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; -/* Computing MIN */ - i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - lwkopt = *n * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEHRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - tau[i__2].r = 0.f, tau[i__2].i = 0.f; -/* L10: */ - } - i__1 = *n - 1; - for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { - i__2 = i__; - tau[i__2].r = 0.f, tau[i__2].i = 0.f; -/* L20: */ - } - -/* Quick return if possible */ - - nh = *ihi - *ilo + 1; - if (nh <= 1) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - iws = 1; - if (nb > 1 && nb < nh) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < nh) { - -/* Determine if workspace is large enough for blocked code. */ - - iws = *n * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code. - - Computing MAX -*/ - i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - if (*lwork >= *n * nbmin) { - nb = *lwork / *n; - } else { - nb = 1; - } - } - } - } - ldwork = *n; - - if (nb < nbmin || nb >= nh) { - -/* Use unblocked code below */ - - i__ = *ilo; - - } else { - -/* Use blocked code */ - - i__1 = *ihi - 1 - nx; - i__2 = nb; - for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *ihi - i__; - ib = min(i__3,i__4); - -/* - Reduce columns i:i+ib-1 to Hessenberg form, returning the - matrices V and T of the block reflector H = I - V*T*V' - which performs the reduction, and also the matrix Y = A*V*T -*/ - - clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & - c__65, &work[1], &ldwork); - -/* - Apply the block reflector H to A(1:ihi,i+ib:ihi) from the - right, computing A := A - Y * V'. V(i+ib,ib-1) must be set - to 1. -*/ - - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - ei.r = a[i__3].r, ei.i = a[i__3].i; - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - a[i__3].r = 1.f, a[i__3].i = 0.f; - i__3 = *ihi - i__ - ib + 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & - q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, - &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda); - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - a[i__3].r = ei.r, a[i__3].i = ei.i; - -/* - Apply the block reflector H to A(i+1:ihi,i+ib:n) from the - left -*/ - - i__3 = *ihi - i__; - i__4 = *n - i__ - ib + 1; - clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & - i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & - c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & - ldwork); -/* L30: */ - } - } - -/* Use unblocked code to reduce the rest of the matrix */ - - cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); - work[1].r = (real) iws, work[1].i = 0.f; - - return 0; - -/* End of CGEHRD */ - -} /* cgehrd_ */ - -/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda, - complex *tau, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static complex alpha; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *), - clarfg_(integer *, complex *, complex *, integer *, complex *), - clacgv_(integer *, complex *, integer *), xerbla_(char *, integer - *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGELQ2 computes an LQ factorization of a complex m by n matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and below the diagonal of the array - contain the m by min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - A(i,i+1:n), and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGELQ2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] - ); - if (i__ < *m) { - -/* Apply H(i) to A(i+1:m,i:n) from the right */ - - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - i__2 = *m - i__; - i__3 = *n - i__ + 1; - clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ - i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__2 = i__ + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); -/* L10: */ - } - return 0; - -/* End of CGELQ2 */ - -} /* cgelq2_ */ - -/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda, - complex *tau, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *, - integer *, complex *, complex *, integer *), clarfb_(char *, char - *, char *, char *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, complex *, integer *, complex *, - integer *), clarft_(char *, char * - , integer *, integer *, complex *, integer *, complex *, complex * - , integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGELQF computes an LQ factorization of a complex M-by-N matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and below the diagonal of the array - contain the m-by-min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - A(i,i+1:n), and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *m * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGELQF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the LQ factorization of the current block - A(i:i+ib-1,i:n) -*/ - - i__3 = *n - i__ + 1; - cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *n - i__ + 1; - clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i+ib:m,i:n) from the right */ - - i__3 = *m - i__ - ib + 1; - i__4 = *n - i__ + 1; - clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, - &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1].r = (real) iws, work[1].i = 0.f; - return 0; - -/* End of CGELQF */ - -} /* cgelqf_ */ - -/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda, - complex *tau, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, k; - static complex alpha; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *), - clarfg_(integer *, complex *, complex *, integer *, complex *), - xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGEQR2 computes a QR factorization of a complex m by n matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(m,n) by n upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEQR2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] - , &c__1, &tau[i__]); - if (i__ < *n) { - -/* Apply H(i)' to A(i:m,i+1:n) from the left */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - i__2 = *m - i__ + 1; - i__3 = *n - i__; - r_cnjg(&q__1, &tau[i__]); - clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1, - &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - i__2 = i__ + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; - } -/* L10: */ - } - return 0; - -/* End of CGEQR2 */ - -} /* cgeqr2_ */ - -/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda, - complex *tau, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *, - integer *, complex *, complex *, integer *), clarfb_(char *, char - *, char *, char *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, complex *, integer *, complex *, - integer *), clarft_(char *, char * - , integer *, integer *, complex *, integer *, complex *, complex * - , integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CGEQRF computes a QR factorization of a complex M-by-N matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(M,N)-by-N upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of min(m,n) elementary reflectors (see Further - Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *n * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGEQRF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the QR factorization of the current block - A(i:m,i:i+ib-1) -*/ - - i__3 = *m - i__ + 1; - cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *m - i__ + 1; - clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i:m,i+ib:n) from the left */ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ - ib + 1; - clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" - , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & - work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, - &work[ib + 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1].r = (real) iws, work[1].i = 0.f; - return 0; - -/* End of CGEQRF */ - -} /* cgeqrf_ */ - -/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a, - integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer - *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2, i__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, ie, il, ir, iu, blk; - static real dum[1], eps; - static integer iru, ivt, iscl; - static real anrm; - static integer idum[1], ierr, itau, irvt; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *); - extern logical lsame_(char *, char *); - static integer chunk, minmn, wrkbl, itaup, itauq; - static logical wntqa; - static integer nwork; - extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *, - integer *, complex *, integer *); - static logical wntqn, wntqo, wntqs; - static integer mnthr1, mnthr2; - extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, - integer *, real *, real *, complex *, complex *, complex *, - integer *, integer *); - extern doublereal clange_(char *, integer *, integer *, complex *, - integer *, real *); - extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, - integer *, complex *, complex *, integer *, integer *), clacrm_( - integer *, integer *, complex *, integer *, real *, integer *, - complex *, integer *, real *), clarcm_(integer *, integer *, real - *, integer *, complex *, integer *, complex *, integer *, real *), - clascl_(char *, integer *, integer *, real *, real *, integer *, - integer *, complex *, integer *, integer *), sbdsdc_(char - *, char *, integer *, real *, real *, real *, integer *, real *, - integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer - *, complex *, complex *, integer *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex - *, integer *, complex *, integer *), claset_(char *, - integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer - *, complex *, integer *, complex *, complex *, integer *, integer - *); - static real bignum; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *), cunglq_( - integer *, integer *, integer *, complex *, integer *, complex *, - complex *, integer *, integer *); - static integer ldwrkl; - extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *, integer *); - static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; - static real smlnum; - static logical wntqas, lquery; - static integer nrwork; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - CGESDD computes the singular value decomposition (SVD) of a complex - M-by-N matrix A, optionally computing the left and/or right singular - vectors, by using divide-and-conquer method. The SVD is written - - A = U * SIGMA * conjugate-transpose(V) - - where SIGMA is an M-by-N matrix which is zero except for its - min(m,n) diagonal elements, U is an M-by-M unitary matrix, and - V is an N-by-N unitary matrix. The diagonal elements of SIGMA - are the singular values of A; they are real and non-negative, and - are returned in descending order. The first min(m,n) columns of - U and V are the left and right singular vectors of A. - - Note that the routine returns VT = V**H, not V. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - Specifies options for computing all or part of the matrix U: - = 'A': all M columns of U and all N rows of V**H are - returned in the arrays U and VT; - = 'S': the first min(M,N) columns of U and the first - min(M,N) rows of V**H are returned in the arrays U - and VT; - = 'O': If M >= N, the first N columns of U are overwritten - on the array A and all rows of V**H are returned in - the array VT; - otherwise, all columns of U are returned in the - array U and the first M rows of V**H are overwritten - in the array VT; - = 'N': no columns of U or rows of V**H are computed. - - M (input) INTEGER - The number of rows of the input matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the input matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, - if JOBZ = 'O', A is overwritten with the first N columns - of U (the left singular vectors, stored - columnwise) if M >= N; - A is overwritten with the first M rows - of V**H (the right singular vectors, stored - rowwise) otherwise. - if JOBZ .ne. 'O', the contents of A are destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - S (output) REAL array, dimension (min(M,N)) - The singular values of A, sorted so that S(i) >= S(i+1). - - U (output) COMPLEX array, dimension (LDU,UCOL) - UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; - UCOL = min(M,N) if JOBZ = 'S'. - If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M - unitary matrix U; - if JOBZ = 'S', U contains the first min(M,N) columns of U - (the left singular vectors, stored columnwise); - if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1; if - JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. - - VT (output) COMPLEX array, dimension (LDVT,N) - If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the - N-by-N unitary matrix V**H; - if JOBZ = 'S', VT contains the first min(M,N) rows of - V**H (the right singular vectors, stored rowwise); - if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1; if - JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; - if JOBZ = 'S', LDVT >= min(M,N). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). - if JOBZ = 'O', - LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). - if JOBZ = 'S' or 'A', - LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). - For good performance, LWORK should generally be larger. - If LWORK < 0 but other input arguments are legal, WORK(1) - returns the optimal LWORK. - - RWORK (workspace) REAL array, dimension (LRWORK) - If JOBZ = 'N', LRWORK >= 7*min(M,N). - Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) - - IWORK (workspace) INTEGER array, dimension (8*min(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The updating process of SBDSDC did not converge. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --s; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - mnthr1 = (integer) (minmn * 17.f / 9.f); - mnthr2 = (integer) (minmn * 5.f / 3.f); - wntqa = lsame_(jobz, "A"); - wntqs = lsame_(jobz, "S"); - wntqas = wntqa || wntqs; - wntqo = lsame_(jobz, "O"); - wntqn = lsame_(jobz, "N"); - minwrk = 1; - maxwrk = 1; - lquery = *lwork == -1; - - if (! (wntqa || wntqs || wntqo || wntqn)) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * - m) { - *info = -8; - } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || - wntqo && *m >= *n && *ldvt < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - CWorkspace refers to complex workspace, and RWorkspace to - real workspace. NB refers to the optimal block size for the - immediately following subroutine, as returned by ILAENV.) -*/ - - if (*info == 0 && *m > 0 && *n > 0) { - if (*m >= *n) { - -/* - There is no complex work space needed for bidiagonal SVD - The real work space needed for bidiagonal SVD is BDSPAC, - BDSPAC = 3*N*N + 4*N -*/ - - if (*m >= mnthr1) { - if (wntqn) { - -/* Path 1 (M much larger than N, JOBZ='N') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); - maxwrk = wrkbl; - minwrk = *n * 3; - } else if (wntqo) { - -/* Path 2 (M much larger than N, JOBZ='O') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *n + *n * *n + wrkbl; - minwrk = (*n << 1) * *n + *n * 3; - } else if (wntqs) { - -/* Path 3 (M much larger than N, JOBZ='S') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *n * *n + wrkbl; - minwrk = *n * *n + *n * 3; - } else if (wntqa) { - -/* Path 4 (M much larger than N, JOBZ='A') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR", - " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *n * *n + wrkbl; - minwrk = *n * *n + (*n << 1) + *m; - } - } else if (*m >= mnthr2) { - -/* Path 5 (M much larger than N, but not as much as MNTHR1) */ - - maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*n << 1) + *m; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *n * *n; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } - } else { - -/* Path 6 (M at least N, but not much larger) */ - - maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*n << 1) + *m; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *n * *n; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } - } - } else { - -/* - There is no complex work space needed for bidiagonal SVD - The real work space needed for bidiagonal SVD is BDSPAC, - BDSPAC = 3*M*M + 4*M -*/ - - if (*n >= mnthr1) { - if (wntqn) { - -/* Path 1t (N much larger than M, JOBZ='N') */ - - maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - maxwrk = max(i__1,i__2); - minwrk = *m * 3; - } else if (wntqo) { - -/* Path 2t (N much larger than M, JOBZ='O') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *n + *m * *m + wrkbl; - minwrk = (*m << 1) * *m + *m * 3; - } else if (wntqs) { - -/* Path 3t (N much larger than M, JOBZ='S') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *m + wrkbl; - minwrk = *m * *m + *m * 3; - } else if (wntqa) { - -/* Path 4t (N much larger than M, JOBZ='A') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ", - " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *m + wrkbl; - minwrk = *m * *m + (*m << 1) + *n; - } - } else if (*n >= mnthr2) { - -/* Path 5t (N much larger than M, but not as much as MNTHR1) */ - - maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*m << 1) + *n; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *m * *m; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } - } else { - -/* Path 6t (N greater than M, but not much larger) */ - - maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*m << 1) + *n; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *m * *m; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, - "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } - } - } - maxwrk = max(maxwrk,minwrk); - work[1].r = (real) maxwrk, work[1].i = 0.f; - } - - if (*lwork < minwrk && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGESDD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - if (*lwork >= 1) { - work[1].r = 1.f, work[1].i = 0.f; - } - return 0; - } - -/* Get machine constants */ - - eps = slamch_("P"); - smlnum = sqrt(slamch_("S")) / eps; - bignum = 1.f / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = clange_("M", m, n, &a[a_offset], lda, dum); - iscl = 0; - if (anrm > 0.f && anrm < smlnum) { - iscl = 1; - clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & - ierr); - } else if (anrm > bignum) { - iscl = 1; - clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & - ierr); - } - - if (*m >= *n) { - -/* - A has at least as many rows as columns. If A has sufficiently - more rows than columns, first reduce using the QR - decomposition (if sufficient workspace available) -*/ - - if (*m >= mnthr1) { - - if (wntqn) { - -/* - Path 1 (M much larger than N, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: need 0) -*/ - - i__1 = *lwork - nwork + 1; - cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out below R */ - - i__1 = *n - 1; - i__2 = *n - 1; - claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (CWorkspace: need 3*N, prefer 2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nrwork = ie + *n; - -/* - Perform bidiagonal SVD, compute singular values only - (CWorkspace: 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2 (M much larger than N, JOBZ='O') - N left singular vectors to be overwritten on A and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - ir = iu + ldwrku * *n; - if (*lwork >= *m * *n + *n * *n + *n * 3) { - -/* WORK(IR) is M by N */ - - ldwrkr = *m; - } else { - ldwrkr = (*lwork - *n * *n - *n * 3) / *n; - } - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy R to WORK( IR ), zeroing out below it */ - - clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__1 = *n - 1; - i__2 = *n - 1; - claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], & - ldwrkr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of R in WORK(IRU) and computing right singular vectors - of R in WORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by the left singular vectors of R - (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & - ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by the right singular vectors of R - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IU), storing result in WORK(IR) and copying to A - (CWorkspace: need 2*N*N, prefer N*N+M*N) - (RWorkspace: 0) -*/ - - i__1 = *m; - i__2 = ldwrkr; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1], - lda, &work[iu], &ldwrku, &c_b55, &work[ir], & - ldwrkr); - clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L10: */ - } - - } else if (wntqs) { - -/* - Path 3 (M much larger than N, JOBZ='S') - N left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is N by N */ - - ldwrkr = *n; - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__2 = *n - 1; - i__1 = *n - 1; - claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], & - ldwrkr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IR), storing result in U - (CWorkspace: need N*N) - (RWorkspace: 0) -*/ - - clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); - cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir] - , &ldwrkr, &c_b55, &u[u_offset], ldu); - - } else if (wntqa) { - -/* - Path 4 (M much larger than N, JOBZ='A') - M left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - itau = iu + ldwrku * *n; - nwork = itau + *n; - -/* - Compute A=Q*R, copying result to U - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Generate Q in U - (CWorkspace: need N+M, prefer N+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], - &i__2, &ierr); - -/* Produce R in A, zeroing out below it */ - - i__2 = *n - 1; - i__1 = *n - 1; - claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (CWorkspace: need 3*N, prefer 2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by left singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of R - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in U by left singular vectors of R in - WORK(IU), storing result in A - (CWorkspace: need N*N) - (RWorkspace: 0) -*/ - - cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu] - , &ldwrku, &c_b55, &a[a_offset], lda); - -/* Copy left singular vectors of A from A to U */ - - clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - - } - - } else if (*m >= mnthr2) { - -/* - MNTHR2 <= M < MNTHR1 - - Path 5 (M much larger than N, but not as much as MNTHR1) - Reduce to bidiagonal form without QR decomposition, use - CUNGBR and matrix multiplication to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *n; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ - nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *n * 3) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - } else { - -/* WORK(IU) is LDWRKU by N */ - - ldwrku = (*lwork - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in WORK(IU), copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] - , &ldwrku, &rwork[nrwork]); - clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need N*N, prefer M*N) - (Rworkspace: need 3*N*N, prefer N*N+2*M*N) -*/ - - nrwork = irvt; - i__2 = *m; - i__1 = ldwrku; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrku); - clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, - &work[iu], &ldwrku, &rwork[nrwork]); - clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + - a_dim1], lda); -/* L20: */ - } - - } else if (wntqs) { - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__1, &ierr); - -/* - Copy A to U, generate Q - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need N*N+2*M*N) -*/ - - nrwork = irvt; - clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], - lda, &rwork[nrwork]); - clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - } else { - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__1, &ierr); - -/* - Copy A to U, generate Q - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: 0) - (Rworkspace: need 3*N*N) -*/ - - nrwork = irvt; - clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], - lda, &rwork[nrwork]); - clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - } - - } else { - -/* - M .LT. MNTHR2 - - Path 6 (M at least N, but not much larger) - Reduce to bidiagonal form without QR decomposition - Use CUNMBR to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *n; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - if (*lwork >= *m * *n + *n * 3) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - } else { - -/* WORK( IU ) is LDWRKU by N */ - - ldwrku = (*lwork - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: need 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - - if (*lwork >= *m * *n + *n * 3) { - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by left singular vectors of A, copying - to A - (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) - (Rworkspace: need 0) -*/ - - claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku); - clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & - ierr); - clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); - } else { - -/* - Generate Q in A - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: need 0) -*/ - - i__1 = *lwork - nwork + 1; - cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & - work[nwork], &i__1, &ierr); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need N*N, prefer M*N) - (Rworkspace: need 3*N*N, prefer N*N+2*M*N) -*/ - - nrwork = irvt; - i__1 = *m; - i__2 = ldwrku; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrku); - clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], - n, &work[iu], &ldwrku, &rwork[nrwork]); - clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + - a_dim1], lda); -/* L30: */ - } - } - - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu); - clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - } else { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* Set the right corner of U to identity matrix */ - - claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu); - i__2 = *m - *n; - i__1 = *m - *n; - claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n + - 1) * u_dim1], ldu); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 2*N+M, prefer 2*N+M*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - } - - } - - } else { - -/* - A has more columns than rows. If A has sufficiently more - columns than rows, first reduce using the LQ decomposition - (if sufficient workspace available) -*/ - - if (*n >= mnthr1) { - - if (wntqn) { - -/* - Path 1t (N much larger than M, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Zero out above L */ - - i__2 = *m - 1; - i__1 = *m - 1; - claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[(a_dim1 << 1) + - 1], lda); - ie = 1; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (CWorkspace: need 3*M, prefer 2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - nrwork = ie + *m; - -/* - Perform bidiagonal SVD, compute singular values only - (CWorkspace: 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2t (N much larger than M, JOBZ='O') - M right singular vectors to be overwritten on A and - M left singular vectors to be computed in U -*/ - - ivt = 1; - ldwkvt = *m; - -/* WORK(IVT) is M by M */ - - il = ivt + ldwkvt * *m; - if (*lwork >= *m * *n + *m * *m + *m * 3) { - -/* WORK(IL) M by N */ - - ldwrkl = *m; - chunk = *n; - } else { - -/* WORK(IL) is M by CHUNK */ - - ldwrkl = *m; - chunk = (*lwork - *m * *m - *m * 3) / *m; - } - itau = il + ldwrkl * chunk; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy L to WORK(IL), zeroing about above it */ - - clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__2 = *m - 1; - i__1 = *m - 1; - claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl], - &ldwrkl); - -/* - Generate Q in A - (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by the left singular vectors of L - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by the right singular vectors of L - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IL) by Q - in A, storing result in WORK(IL) and copying to A - (CWorkspace: need 2*M*M, prefer M*M+M*N)) - (RWorkspace: 0) -*/ - - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[ - i__ * a_dim1 + 1], lda, &c_b55, &work[il], & - ldwrkl); - clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 - + 1], lda); -/* L40: */ - } - - } else if (wntqs) { - -/* - Path 3t (N much larger than M, JOBZ='S') - M right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - il = 1; - -/* WORK(IL) is M by M */ - - ldwrkl = *m; - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy L to WORK(IL), zeroing out above it */ - - clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__1 = *m - 1; - i__2 = *m - 1; - claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl], - &ldwrkl); - -/* - Generate Q in A - (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by left singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Copy VT to WORK(IL), multiply right singular vectors of L - in WORK(IL) by Q in A, storing result in VT - (CWorkspace: need M*M) - (RWorkspace: 0) -*/ - - clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); - cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[ - a_offset], lda, &c_b55, &vt[vt_offset], ldvt); - - } else if (wntqa) { - -/* - Path 9t (N much larger than M, JOBZ='A') - N right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* WORK(IVT) is M by M */ - - ldwkvt = *m; - itau = ivt + ldwkvt * *m; - nwork = itau + *m; - -/* - Compute A=L*Q, copying result to VT - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Generate Q in VT - (CWorkspace: need M+N, prefer M+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ - nwork], &i__1, &ierr); - -/* Produce L in A, zeroing out above it */ - - i__1 = *m - 1; - i__2 = *m - 1; - claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[(a_dim1 << 1) + - 1], lda); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of L - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by right singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by - Q in VT, storing result in A - (CWorkspace: need M*M) - (RWorkspace: 0) -*/ - - cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[ - vt_offset], ldvt, &c_b55, &a[a_offset], lda); - -/* Copy right singular vectors of A from A to VT */ - - clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - - } - - } else if (*n >= mnthr2) { - -/* - MNTHR2 <= N < MNTHR1 - - Path 5t (N much larger than M, but not as much as MNTHR1) - Reduce to bidiagonal form without QR decomposition, use - CUNGBR and matrix multiplication to compute singular vectors -*/ - - - ie = 1; - nrwork = ie + *m; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) - (RWorkspace: M) -*/ - - i__1 = *lwork - nwork + 1; - cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, &ierr); - - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - ivt = nwork; - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Generate P**H in A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ - nwork], &i__1, &ierr); - - ldwkvt = *m; - if (*lwork >= *m * *n + *m * 3) { - -/* WORK( IVT ) is M by N */ - - nwork = ivt + ldwkvt * *n; - chunk = *n; - } else { - -/* WORK( IVT ) is M by CHUNK */ - - chunk = (*lwork - *m * 3) / *m; - nwork = ivt + ldwkvt * chunk; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRVT) - storing the result in WORK(IVT), copying to U - (Cworkspace: need 0) - (Rworkspace: need 2*M*M) -*/ - - clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & - ldwkvt, &rwork[nrwork]); - clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); - -/* - Multiply RWORK(IRVT) by P**H in A, storing the - result in WORK(IVT), copying to A - (CWorkspace: need M*M, prefer M*N) - (Rworkspace: need 2*M*M, prefer 2*M*N) -*/ - - nrwork = iru; - i__1 = *n; - i__2 = chunk; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], - lda, &work[ivt], &ldwkvt, &rwork[nrwork]); - clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * - a_dim1 + 1], lda); -/* L50: */ - } - } else if (wntqs) { - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__2, &ierr); - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need 3*M*M) -*/ - - clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], - lda, &rwork[nrwork]); - clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need M*M+2*M*N) -*/ - - nrwork = iru; - clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - } else { - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__2, &ierr); - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need 3*M*M) -*/ - - clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], - lda, &rwork[nrwork]); - clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need M*M+2*M*N) -*/ - - clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - } - - } else { - -/* - N .LT. MNTHR2 - - Path 6t (N greater than M, but not much larger) - Reduce to bidiagonal form without LQ decomposition - Use CUNMBR to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *m; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) - (RWorkspace: M) -*/ - - i__2 = *lwork - nwork + 1; - cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - ldwkvt = *m; - ivt = nwork; - if (*lwork >= *m * *n + *m * 3) { - -/* WORK( IVT ) is M by N */ - - claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt); - nwork = ivt + ldwkvt * *n; - } else { - -/* WORK( IVT ) is M by CHUNK */ - - chunk = (*lwork - *m * 3) / *m; - nwork = ivt + ldwkvt * chunk; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: need 0) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *m * 3) { - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by right singular vectors of A, - copying to A - (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) - (Rworkspace: need 0) -*/ - - clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__2 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, - &ierr); - clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); - } else { - -/* - Generate P**H in A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: need 0) -*/ - - i__2 = *lwork - nwork + 1; - cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need M*M, prefer M*N) - (Rworkspace: need 3*M*M, prefer M*M+2*M*N) -*/ - - nrwork = iru; - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] - , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); - clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * - a_dim1 + 1], lda); -/* L60: */ - } - } - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt); - clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - - sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* Set the right corner of VT to identity matrix */ - - i__1 = *n - *m; - i__2 = *n - *m; - claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m + - 1) * vt_dim1], ldvt); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 2*M+N, prefer 2*M+N*NB) - (RWorkspace: M*M) -*/ - - claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt); - clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } - -/* Undo scaling if necessary */ - - if (iscl == 1) { - if (anrm > bignum) { - slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - if (anrm < smlnum) { - slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - } - -/* Return optimal workspace in WORK(1) */ - - work[1].r = (real) maxwrk, work[1].i = 0.f; - - return 0; - -/* End of CGESDD */ - -} /* cgesdd_ */ - -/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer * - lda, integer *ipiv, complex *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *, - integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer - *, integer *, complex *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - CGESV computes the solution to a complex system of linear equations - A * X = B, - where A is an N-by-N matrix and X and B are N-by-NRHS matrices. - - The LU decomposition with partial pivoting and row interchanges is - used to factor A as - A = P * L * U, - where P is a permutation matrix, L is unit lower triangular, and U is - upper triangular. The factored form of A is then used to solve the - system of equations A * X = B. - - Arguments - ========= - - N (input) INTEGER - The number of linear equations, i.e., the order of the - matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the N-by-N coefficient matrix A. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (output) INTEGER array, dimension (N) - The pivot indices that define the permutation matrix P; - row i of the matrix was interchanged with row IPIV(i). - - B (input/output) COMPLEX array, dimension (LDB,NRHS) - On entry, the N-by-NRHS matrix of right hand side matrix B. - On exit, if INFO = 0, the N-by-NRHS solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, so the solution could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*nrhs < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGESV ", &i__1); - return 0; - } - -/* Compute the LU factorization of A. */ - - cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); - if (*info == 0) { - -/* Solve the system A*X = B, overwriting B with X. */ - - cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ - b_offset], ldb, info); - } - return 0; - -/* End of CGESV */ - -} /* cgesv_ */ - -/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda, - integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void c_div(complex *, complex *, complex *); - - /* Local variables */ - static integer j, jp; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), cgeru_(integer *, integer *, complex *, complex *, - integer *, complex *, integer *, complex *, integer *), cswap_( - integer *, complex *, integer *, complex *, integer *); - extern integer icamax_(integer *, complex *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGETF2 computes an LU factorization of a general m-by-n matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 2 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, U(k,k) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGETF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - -/* Find pivot and test for singularity. */ - - i__2 = *m - j + 1; - jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1); - ipiv[j] = jp; - i__2 = jp + j * a_dim1; - if (a[i__2].r != 0.f || a[i__2].i != 0.f) { - -/* Apply the interchange to columns 1:N. */ - - if (jp != j) { - cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); - } - -/* Compute elements J+1:M of J-th column. */ - - if (j < *m) { - i__2 = *m - j; - c_div(&q__1, &c_b56, &a[j + j * a_dim1]); - cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1); - } - - } else if (*info == 0) { - - *info = j; - } - - if (j < min(*m,*n)) { - -/* Update trailing submatrix. */ - - i__2 = *m - j; - i__3 = *n - j; - q__1.r = -1.f, q__1.i = -0.f; - cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + - (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) - ; - } -/* L10: */ - } - return 0; - -/* End of CGETF2 */ - -} /* cgetf2_ */ - -/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda, - integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1; - - /* Local variables */ - static integer i__, j, jb, nb; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *); - static integer iinfo; - extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *), cgetf2_(integer *, - integer *, complex *, integer *, integer *, integer *), xerbla_( - char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int claswp_(integer *, complex *, integer *, - integer *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGETRF computes an LU factorization of a general M-by-N matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 3 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the M-by-N matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGETRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - if (nb <= 1 || nb >= min(*m,*n)) { - -/* Use unblocked code. */ - - cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); - } else { - -/* Use blocked code. */ - - i__1 = min(*m,*n); - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__3 = min(*m,*n) - j + 1; - jb = min(i__3,nb); - -/* - Factor diagonal and subdiagonal blocks and test for exact - singularity. -*/ - - i__3 = *m - j + 1; - cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); - -/* Adjust INFO and the pivot indices. */ - - if (*info == 0 && iinfo > 0) { - *info = iinfo + j - 1; - } -/* Computing MIN */ - i__4 = *m, i__5 = j + jb - 1; - i__3 = min(i__4,i__5); - for (i__ = j; i__ <= i__3; ++i__) { - ipiv[i__] = j - 1 + ipiv[i__]; -/* L10: */ - } - -/* Apply interchanges to columns 1:J-1. */ - - i__3 = j - 1; - i__4 = j + jb - 1; - claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); - - if (j + jb <= *n) { - -/* Apply interchanges to columns J+JB:N. */ - - i__3 = *n - j - jb + 1; - i__4 = j + jb - 1; - claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & - ipiv[1], &c__1); - -/* Compute block row of U. */ - - i__3 = *n - j - jb + 1; - ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & - c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) * - a_dim1], lda); - if (j + jb <= *m) { - -/* Update trailing submatrix. */ - - i__3 = *m - j - jb + 1; - i__4 = *n - j - jb + 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, - &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + - jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) * - a_dim1], lda); - } - } -/* L20: */ - } - } - return 0; - -/* End of CGETRF */ - -} /* cgetrf_ */ - -/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex * - a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *), xerbla_(char *, - integer *), claswp_(integer *, complex *, integer *, - integer *, integer *, integer *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CGETRS solves a system of linear equations - A * X = B, A**T * X = B, or A**H * X = B - with a general N-by-N matrix A using the LU factorization computed - by CGETRF. - - Arguments - ========= - - TRANS (input) CHARACTER*1 - Specifies the form of the system of equations: - = 'N': A * X = B (No transpose) - = 'T': A**T * X = B (Transpose) - = 'C': A**H * X = B (Conjugate transpose) - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) COMPLEX array, dimension (LDA,N) - The factors L and U from the factorization A = P*L*U - as computed by CGETRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (input) INTEGER array, dimension (N) - The pivot indices from CGETRF; for 1<=i<=N, row i of the - matrix was interchanged with row IPIV(i). - - B (input/output) COMPLEX array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - notran = lsame_(trans, "N"); - if (! notran && ! lsame_(trans, "T") && ! lsame_( - trans, "C")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CGETRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (notran) { - -/* - Solve A * X = B. - - Apply row interchanges to the right hand sides. -*/ - - claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); - -/* Solve L*X = B, overwriting B with X. */ - - ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, & - a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A**T * X = B or A**H * X = B. - - Solve U'*X = B, overwriting B with X. -*/ - - ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset], - lda, &b[b_offset], ldb); - -/* Apply row interchanges to the solution vectors. */ - - claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); - } - - return 0; - -/* End of CGETRS */ - -} /* cgetrs_ */ - -/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a, - integer *lda, real *w, complex *work, integer *lwork, real *rwork, - integer *lrwork, integer *iwork, integer *liwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real eps; - static integer inde; - static real anrm; - static integer imax; - static real rmin, rmax; - static integer lopt; - static real sigma; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static integer lwmin, liopt; - static logical lower; - static integer llrwk, lropt; - static logical wantz; - static integer indwk2, llwrk2; - extern doublereal clanhe_(char *, char *, integer *, complex *, integer *, - real *); - static integer iscale; - extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, - integer *, complex *, integer *, real *, integer *, integer *, - integer *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer - *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer - *, complex *, integer *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real bignum; - static integer indtau, indrwk, indwrk, liwmin; - extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); - static integer lrwmin; - extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *); - static integer llwork; - static real smlnum; - static logical lquery; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CHEEVD computes all eigenvalues and, optionally, eigenvectors of a - complex Hermitian matrix A. If eigenvectors are desired, it uses a - divide and conquer algorithm. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only; - = 'V': Compute eigenvalues and eigenvectors. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA, N) - On entry, the Hermitian matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of A contains the - upper triangular part of the matrix A. If UPLO = 'L', - the leading N-by-N lower triangular part of A contains - the lower triangular part of the matrix A. - On exit, if JOBZ = 'V', then if INFO = 0, A contains the - orthonormal eigenvectors of the matrix A. - If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') - or the upper triangle (if UPLO='U') of A, including the - diagonal, is destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) REAL array, dimension (N) - If INFO = 0, the eigenvalues in ascending order. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. - If N <= 1, LWORK must be at least 1. - If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. - If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace/output) REAL array, - dimension (LRWORK) - On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. - - LRWORK (input) INTEGER - The dimension of the array RWORK. - If N <= 1, LRWORK must be at least 1. - If JOBZ = 'N' and N > 1, LRWORK must be at least N. - If JOBZ = 'V' and N > 1, LRWORK must be at least - 1 + 5*N + 2*N**2. - - If LRWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the RWORK array, - returns this value as the first entry of the RWORK array, and - no error message related to LRWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If N <= 1, LIWORK must be at least 1. - If JOBZ = 'N' and N > 1, LIWORK must be at least 1. - If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the algorithm failed to converge; i - off-diagonal elements of an intermediate tridiagonal - form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - --work; - --rwork; - --iwork; - - /* Function Body */ - wantz = lsame_(jobz, "V"); - lower = lsame_(uplo, "L"); - lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; - - *info = 0; - if (*n <= 1) { - lwmin = 1; - lrwmin = 1; - liwmin = 1; - lopt = lwmin; - lropt = lrwmin; - liopt = liwmin; - } else { - if (wantz) { - lwmin = (*n << 1) + *n * *n; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); - liwmin = *n * 5 + 3; - } else { - lwmin = *n + 1; - lrwmin = *n; - liwmin = 1; - } - lopt = lwmin; - lropt = lrwmin; - liopt = liwmin; - } - if (! (wantz || lsame_(jobz, "N"))) { - *info = -1; - } else if (! (lower || lsame_(uplo, "U"))) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*lrwork < lrwmin && ! lquery) { - *info = -10; - } else if (*liwork < liwmin && ! lquery) { - *info = -12; - } - - if (*info == 0) { - work[1].r = (real) lopt, work[1].i = 0.f; - rwork[1] = (real) lropt; - iwork[1] = liopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CHEEVD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - i__1 = a_dim1 + 1; - w[1] = a[i__1].r; - if (wantz) { - i__1 = a_dim1 + 1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - } - return 0; - } - -/* Get machine constants. */ - - safmin = slamch_("Safe minimum"); - eps = slamch_("Precision"); - smlnum = safmin / eps; - bignum = 1.f / smlnum; - rmin = sqrt(smlnum); - rmax = sqrt(bignum); - -/* Scale matrix to allowable range, if necessary. */ - - anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); - iscale = 0; - if (anrm > 0.f && anrm < rmin) { - iscale = 1; - sigma = rmin / anrm; - } else if (anrm > rmax) { - iscale = 1; - sigma = rmax / anrm; - } - if (iscale == 1) { - clascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda, - info); - } - -/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ - - inde = 1; - indtau = 1; - indwrk = indtau + *n; - indrwk = inde + *n; - indwk2 = indwrk + *n * *n; - llwork = *lwork - indwrk + 1; - llwrk2 = *lwork - indwk2 + 1; - llrwk = *lrwork - indrwk + 1; - chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & - work[indwrk], &llwork, &iinfo); -/* Computing MAX */ - i__1 = indwrk; - r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r; - lopt = dmax(r__1,r__2); - -/* - For eigenvalues only, call SSTERF. For eigenvectors, first call - CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the - tridiagonal matrix, then call CUNMTR to multiply it to the - Householder transformations represented as Householder vectors in - A. -*/ - - if (! wantz) { - ssterf_(n, &w[1], &rwork[inde], info); - } else { - cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], - &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); - cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ - indwrk], n, &work[indwk2], &llwrk2, &iinfo); - clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); -/* - Computing MAX - Computing 2nd power -*/ - i__3 = *n; - i__4 = indwk2; - i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r; - lopt = max(i__1,i__2); - } - -/* If matrix was scaled, then rescale eigenvalues appropriately. */ - - if (iscale == 1) { - if (*info == 0) { - imax = *n; - } else { - imax = *info - 1; - } - r__1 = 1.f / sigma; - sscal_(&imax, &r__1, &w[1], &c__1); - } - - work[1].r = (real) lopt, work[1].i = 0.f; - rwork[1] = (real) lropt; - iwork[1] = liopt; - - return 0; - -/* End of CHEEVD */ - -} /* cheevd_ */ - -/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda, - real *d__, real *e, complex *tau, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Local variables */ - static integer i__; - static complex taui; - extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * - , integer *, complex *, integer *, complex *, integer *); - static complex alpha; - extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer - *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * - , integer *, complex *, integer *, complex *, complex *, integer * - ), caxpy_(integer *, complex *, complex *, integer *, - complex *, integer *); - static logical upper; - extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, - integer *, complex *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - CHETD2 reduces a complex Hermitian matrix A to real symmetric - tridiagonal form T by a unitary similarity transformation: - Q' * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the unitary - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the unitary matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) REAL array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) REAL array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) COMPLEX array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CHETD2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - - if (upper) { - -/* Reduce the upper triangle of A */ - - i__1 = *n + *n * a_dim1; - i__2 = *n + *n * a_dim1; - r__1 = a[i__2].r; - a[i__1].r = r__1, a[i__1].i = 0.f; - for (i__ = *n - 1; i__ >= 1; --i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(1:i-1,i+1) -*/ - - i__1 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__1].r, alpha.i = a[i__1].i; - clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); - i__1 = i__; - e[i__1] = alpha.r; - - if (taui.r != 0.f || taui.i != 0.f) { - -/* Apply H(i) from both sides to A(1:i,1:i) */ - - i__1 = i__ + (i__ + 1) * a_dim1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - -/* Compute x := tau * A * v storing x in TAU(1:i) */ - - chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * - a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1) - ; - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - q__3.r = -.5f, q__3.i = -0.f; - q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * - taui.i + q__3.i * taui.r; - cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] - , &c__1); - q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * - q__4.i + q__2.i * q__4.r; - alpha.r = q__1.r, alpha.i = q__1.i; - caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ - 1], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - q__1.r = -1.f, q__1.i = -0.f; - cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & - tau[1], &c__1, &a[a_offset], lda); - - } else { - i__1 = i__ + i__ * a_dim1; - i__2 = i__ + i__ * a_dim1; - r__1 = a[i__2].r; - a[i__1].r = r__1, a[i__1].i = 0.f; - } - i__1 = i__ + (i__ + 1) * a_dim1; - i__2 = i__; - a[i__1].r = e[i__2], a[i__1].i = 0.f; - i__1 = i__ + 1; - i__2 = i__ + 1 + (i__ + 1) * a_dim1; - d__[i__1] = a[i__2].r; - i__1 = i__; - tau[i__1].r = taui.r, tau[i__1].i = taui.i; -/* L10: */ - } - i__1 = a_dim1 + 1; - d__[1] = a[i__1].r; - } else { - -/* Reduce the lower triangle of A */ - - i__1 = a_dim1 + 1; - i__2 = a_dim1 + 1; - r__1 = a[i__2].r; - a[i__1].r = r__1, a[i__1].i = 0.f; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(i+2:n,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & - taui); - i__2 = i__; - e[i__2] = alpha.r; - - if (taui.r != 0.f || taui.i != 0.f) { - -/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ - - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute x := tau * A * v storing y in TAU(i:n-1) */ - - i__2 = *n - i__; - chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[ - i__], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - q__3.r = -.5f, q__3.i = -0.f; - q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * - taui.i + q__3.i * taui.r; - i__2 = *n - i__; - cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * - a_dim1], &c__1); - q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * - q__4.i + q__2.i * q__4.r; - alpha.r = q__1.r, alpha.i = q__1.i; - i__2 = *n - i__; - caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - i__2 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1, - &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda); - - } else { - i__2 = i__ + 1 + (i__ + 1) * a_dim1; - i__3 = i__ + 1 + (i__ + 1) * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } - i__2 = i__ + 1 + i__ * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.f; - i__2 = i__; - i__3 = i__ + i__ * a_dim1; - d__[i__2] = a[i__3].r; - i__2 = i__; - tau[i__2].r = taui.r, tau[i__2].i = taui.i; -/* L20: */ - } - i__1 = *n; - i__2 = *n + *n * a_dim1; - d__[i__1] = a[i__2].r; - } - - return 0; - -/* End of CHETD2 */ - -} /* chetd2_ */ - -/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, - real *d__, real *e, complex *tau, complex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1; - - /* Local variables */ - static integer i__, j, nb, kk, nx, iws; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - static logical upper; - extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer - *, real *, real *, complex *, integer *), cher2k_(char *, - char *, integer *, integer *, complex *, complex *, integer *, - complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer - *, real *, complex *, complex *, integer *), xerbla_(char - *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CHETRD reduces a complex Hermitian matrix A to real symmetric - tridiagonal form T by a unitary similarity transformation: - Q**H * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the unitary - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the unitary matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) REAL array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) REAL array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) COMPLEX array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - --work; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*lwork < 1 && ! lquery) { - *info = -9; - } - - if (*info == 0) { - -/* Determine the block size. */ - - nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, - (ftnlen)1); - lwkopt = *n * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CHETRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nx = *n; - iws = 1; - if (nb > 1 && nb < *n) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *n) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code by setting NX = N. - - Computing MAX -*/ - i__1 = *lwork / ldwork; - nb = max(i__1,1); - nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, - (ftnlen)6, (ftnlen)1); - if (nb < nbmin) { - nx = *n; - } - } - } else { - nx = *n; - } - } else { - nb = 1; - } - - if (upper) { - -/* - Reduce the upper triangle of A. - Columns 1:kk are handled by the unblocked method. -*/ - - kk = *n - (*n - nx + nb - 1) / nb * nb; - i__1 = kk + 1; - i__2 = -nb; - for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = i__ + nb - 1; - clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & - work[1], &ldwork); - -/* - Update the unreduced submatrix A(1:i-1,1:i-1), using an - update of the form: A := A - V*W' - W*V' -*/ - - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1 - + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset], lda); - -/* - Copy superdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j - 1 + j * a_dim1; - i__5 = j - 1; - a[i__4].r = e[i__5], a[i__4].i = 0.f; - i__4 = j; - i__5 = j + j * a_dim1; - d__[i__4] = a[i__5].r; -/* L10: */ - } -/* L20: */ - } - -/* Use unblocked code to reduce the last or only block */ - - chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); - } else { - -/* Reduce the lower triangle of A */ - - i__2 = *n - nx; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = *n - i__ + 1; - clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & - tau[i__], &work[1], &ldwork); - -/* - Update the unreduced submatrix A(i+nb:n,i+nb:n), using - an update of the form: A := A - V*W' - W*V' -*/ - - i__3 = *n - i__ - nb + 1; - q__1.r = -1.f, q__1.i = -0.f; - cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb + - i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[ - i__ + nb + (i__ + nb) * a_dim1], lda); - -/* - Copy subdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + 1 + j * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.f; - i__4 = j; - i__5 = j + j * a_dim1; - d__[i__4] = a[i__5].r; -/* L30: */ - } -/* L40: */ - } - -/* Use unblocked code to reduce the last or only block */ - - i__1 = *n - i__ + 1; - chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], - &tau[i__], &iinfo); - } - - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CHETRD */ - -} /* chetrd_ */ - -/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo, - integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, - integer *ldz, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], - i__5, i__6; - real r__1, r__2, r__3, r__4; - complex q__1; - char ch__1[2]; - - /* Builtin functions */ - double r_imag(complex *); - void r_cnjg(complex *, complex *); - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__, j, k, l; - static complex s[225] /* was [15][15] */, v[16]; - static integer i1, i2, ii, nh, nr, ns, nv; - static complex vv[16]; - static integer itn; - static complex tau; - static integer its; - static real ulp, tst1; - static integer maxb, ierr; - static real unfl; - static complex temp; - static real ovfl; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *), ccopy_(integer *, complex *, integer *, - complex *, integer *); - static integer itemp; - static real rtemp; - static logical initz, wantt, wantz; - static real rwork[1]; - extern doublereal slapy2_(real *, real *); - extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *, - complex *, complex *, integer *, complex *); - extern integer icamax_(integer *, complex *, integer *); - extern doublereal slamch_(char *), clanhs_(char *, integer *, - complex *, integer *, real *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), clahqr_(logical *, logical *, integer *, integer *, integer *, - complex *, integer *, complex *, integer *, integer *, complex *, - integer *, integer *), clacpy_(char *, integer *, integer *, - complex *, integer *, complex *, integer *), claset_(char - *, integer *, integer *, complex *, complex *, complex *, integer - *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex - *, complex *, complex *, integer *, complex *); - static real smlnum; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CHSEQR computes the eigenvalues of a complex upper Hessenberg - matrix H, and, optionally, the matrices T and Z from the Schur - decomposition H = Z T Z**H, where T is an upper triangular matrix - (the Schur form), and Z is the unitary matrix of Schur vectors. - - Optionally Z may be postmultiplied into an input unitary matrix Q, - so that this routine can give the Schur factorization of a matrix A - which has been reduced to the Hessenberg form H by the unitary - matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. - - Arguments - ========= - - JOB (input) CHARACTER*1 - = 'E': compute eigenvalues only; - = 'S': compute eigenvalues and the Schur form T. - - COMPZ (input) CHARACTER*1 - = 'N': no Schur vectors are computed; - = 'I': Z is initialized to the unit matrix and the matrix Z - of Schur vectors of H is returned; - = 'V': Z must contain an unitary matrix Q on entry, and - the product Q*Z is returned. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to CGEBAL, and then passed to CGEHRD - when the matrix output by CGEBAL is reduced to Hessenberg - form. Otherwise ILO and IHI should be set to 1 and N - respectively. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - H (input/output) COMPLEX array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if JOB = 'S', H contains the upper triangular matrix - T from the Schur decomposition (the Schur form). If - JOB = 'E', the contents of H are unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - W (output) COMPLEX array, dimension (N) - The computed eigenvalues. If JOB = 'S', the eigenvalues are - stored in the same order as on the diagonal of the Schur form - returned in H, with W(i) = H(i,i). - - Z (input/output) COMPLEX array, dimension (LDZ,N) - If COMPZ = 'N': Z is not referenced. - If COMPZ = 'I': on entry, Z need not be set, and on exit, Z - contains the unitary matrix Z of the Schur vectors of H. - If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, - which is assumed to be equal to the unit matrix except for - the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. - Normally Q is the unitary matrix generated by CUNGHR after - the call to CGEHRD which formed the Hessenberg matrix H. - - LDZ (input) INTEGER - The leading dimension of the array Z. - LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, CHSEQR failed to compute all the - eigenvalues in a total of 30*(IHI-ILO+1) iterations; - elements 1:ilo-1 and i+1:n of W contain those - eigenvalues which have been successfully computed. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --w; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - wantt = lsame_(job, "S"); - initz = lsame_(compz, "I"); - wantz = initz || lsame_(compz, "V"); - - *info = 0; - i__1 = max(1,*n); - work[1].r = (real) i__1, work[1].i = 0.f; - lquery = *lwork == -1; - if (! lsame_(job, "E") && ! wantt) { - *info = -1; - } else if (! lsame_(compz, "N") && ! wantz) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*ldh < max(1,*n)) { - *info = -7; - } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { - *info = -10; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CHSEQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Initialize Z, if necessary */ - - if (initz) { - claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz); - } - -/* Store the eigenvalues isolated by CGEBAL. */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__ + i__ * h_dim1; - w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; -/* L10: */ - } - i__1 = *n; - for (i__ = *ihi + 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__ + i__ * h_dim1; - w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; -/* L20: */ - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - i__1 = *ilo; - i__2 = *ilo + *ilo * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - return 0; - } - -/* - Set rows and columns ILO to IHI to zero below the first - subdiagonal. -*/ - - i__1 = *ihi - 2; - for (j = *ilo; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j + 2; i__ <= i__2; ++i__) { - i__3 = i__ + j * h_dim1; - h__[i__3].r = 0.f, h__[i__3].i = 0.f; -/* L30: */ - } -/* L40: */ - } - nh = *ihi - *ilo + 1; - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are re-set inside the main loop. -*/ - - if (wantt) { - i1 = 1; - i2 = *n; - } else { - i1 = *ilo; - i2 = *ihi; - } - -/* Ensure that the subdiagonal elements are real. */ - - i__1 = *ihi; - for (i__ = *ilo + 1; i__ <= i__1; ++i__) { - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (r_imag(&temp) != 0.f) { - r__1 = temp.r; - r__2 = r_imag(&temp); - rtemp = slapy2_(&r__1, &r__2); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.f; - q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; - temp.r = q__1.r, temp.i = q__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - r_cnjg(&q__1, &temp); - cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (i__ < *ihi) { - i__2 = i__ + 1 + i__ * h_dim1; - i__3 = i__ + 1 + i__ * h_dim1; - q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i = - temp.r * h__[i__3].i + temp.i * h__[i__3].r; - h__[i__2].r = q__1.r, h__[i__2].i = q__1.i; - } - if (wantz) { - cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); - } - } -/* L50: */ - } - -/* - Determine the order of the multi-shift QR algorithm to be used. - - Writing concatenation -*/ - i__4[0] = 1, a__1[0] = job; - i__4[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); - ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); -/* Writing concatenation */ - i__4[0] = 1, a__1[0] = job; - i__4[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); - maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (ns <= 1 || ns > nh || maxb >= nh) { - -/* Use the standard double-shift algorithm */ - - clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, - ihi, &z__[z_offset], ldz, info); - return 0; - } - maxb = max(2,maxb); -/* Computing MIN */ - i__1 = min(ns,maxb); - ns = min(i__1,15); - -/* - Now 1 < NS <= MAXB < NH. - - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = slamch_("Safe minimum"); - ovfl = 1.f / unfl; - slabad_(&unfl, &ovfl); - ulp = slamch_("Precision"); - smlnum = unfl * (nh / ulp); - -/* ITN is the total number of multiple-shift QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of at most MAXB. Each iteration of the loop - works with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO, or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L60: - if (i__ < *ilo) { - goto L180; - } - -/* - Perform multiple-shift QR iterations on rows and columns ILO to I - until a submatrix of order at most MAXB splits off at the bottom - because a subdiagonal element has become negligible. -*/ - - l = *ilo; - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - i__3 = k - 1 + (k - 1) * h_dim1; - i__5 = k + k * h_dim1; - tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k - - 1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5] - .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]), - dabs(r__4))); - if (tst1 == 0.f) { - i__3 = i__ - l + 1; - tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); - } - i__3 = k + (k - 1) * h_dim1; -/* Computing MAX */ - r__2 = ulp * tst1; - if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) { - goto L80; - } -/* L70: */ - } -L80: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible. */ - - i__2 = l + (l - 1) * h_dim1; - h__[i__2].r = 0.f, h__[i__2].i = 0.f; - } - -/* Exit from loop if a submatrix of order <= MAXB has split off. */ - - if (l >= i__ - maxb + 1) { - goto L170; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! wantt) { - i1 = l; - i2 = i__; - } - - if (its == 20 || its == 30) { - -/* Exceptional shifts. */ - - i__2 = i__; - for (ii = i__ - ns + 1; ii <= i__2; ++ii) { - i__3 = ii; - i__5 = ii + (ii - 1) * h_dim1; - i__6 = ii + ii * h_dim1; - r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6] - .r, dabs(r__2))) * 1.5f; - w[i__3].r = r__3, w[i__3].i = 0.f; -/* L90: */ - } - } else { - -/* Use eigenvalues of trailing submatrix of order NS as shifts. */ - - clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * - h_dim1], ldh, s, &c__15); - clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - - ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); - if (ierr > 0) { - -/* - If CLAHQR failed to compute all NS eigenvalues, use the - unconverged diagonal elements as the remaining shifts. -*/ - - i__2 = ierr; - for (ii = 1; ii <= i__2; ++ii) { - i__3 = i__ - ns + ii; - i__5 = ii + ii * 15 - 16; - w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i; -/* L100: */ - } - } - } - -/* - Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) - where G is the Hessenberg submatrix H(L:I,L:I) and w is - the vector of shifts (stored in W). The result is - stored in the local array V. -*/ - - v[0].r = 1.f, v[0].i = 0.f; - i__2 = ns + 1; - for (ii = 2; ii <= i__2; ++ii) { - i__3 = ii - 1; - v[i__3].r = 0.f, v[i__3].i = 0.f; -/* L110: */ - } - nv = 1; - i__2 = i__; - for (j = i__ - ns + 1; j <= i__2; ++j) { - i__3 = nv + 1; - ccopy_(&i__3, v, &c__1, vv, &c__1); - i__3 = nv + 1; - i__5 = j; - q__1.r = -w[i__5].r, q__1.i = -w[i__5].i; - cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1], - ldh, vv, &c__1, &q__1, v, &c__1); - ++nv; - -/* - Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, - reset it to the unit vector. -*/ - - itemp = icamax_(&nv, v, &c__1); - i__3 = itemp - 1; - rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp - - 1]), dabs(r__2)); - if (rtemp == 0.f) { - v[0].r = 1.f, v[0].i = 0.f; - i__3 = nv; - for (ii = 2; ii <= i__3; ++ii) { - i__5 = ii - 1; - v[i__5].r = 0.f, v[i__5].i = 0.f; -/* L120: */ - } - } else { - rtemp = dmax(rtemp,smlnum); - r__1 = 1.f / rtemp; - csscal_(&nv, &r__1, v, &c__1); - } -/* L130: */ - } - -/* Multiple-shift QR step */ - - i__2 = i__ - 1; - for (k = l; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__3 = ns + 1, i__5 = i__ - k + 1; - nr = min(i__3,i__5); - if (k > l) { - ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - clarfg_(&nr, v, &v[1], &c__1, &tau); - if (k > l) { - i__3 = k + (k - 1) * h_dim1; - h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; - i__3 = i__; - for (ii = k + 1; ii <= i__3; ++ii) { - i__5 = ii + (k - 1) * h_dim1; - h__[i__5].r = 0.f, h__[i__5].i = 0.f; -/* L140: */ - } - } - v[0].r = 1.f, v[0].i = 0.f; - -/* - Apply G' from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2 - k + 1; - r_cnjg(&q__1, &tau); - clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, & - work[1]); - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+NR,I). - - Computing MIN -*/ - i__5 = k + nr; - i__3 = min(i__5,i__) - i1 + 1; - clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, - &work[1]); - - if (wantz) { - -/* Accumulate transformations in the matrix Z */ - - clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], - ldz, &work[1]); - } -/* L150: */ - } - -/* Ensure that H(I,I-1) is real. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (r_imag(&temp) != 0.f) { - r__1 = temp.r; - r__2 = r_imag(&temp); - rtemp = slapy2_(&r__1, &r__2); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.f; - q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; - temp.r = q__1.r, temp.i = q__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - r_cnjg(&q__1, &temp); - cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (wantz) { - cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); - } - } - -/* L160: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L170: - -/* - A submatrix of order <= MAXB in rows and columns L to I has split - off. Use the double-shift QR algorithm to handle it. -*/ - - clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi, - &z__[z_offset], ldz, info); - if (*info > 0) { - return 0; - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with a new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L60; - -L180: - i__1 = max(1,*n); - work[1].r = (real) i__1, work[1].i = 0.f; - return 0; - -/* End of CHSEQR */ - -} /* chseqr_ */ - -/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a, - integer *lda, real *d__, real *e, complex *tauq, complex *taup, - complex *x, integer *ldx, complex *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, - i__3; - complex q__1; - - /* Local variables */ - static integer i__; - static complex alpha; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), cgemv_(char *, integer *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, complex *, - integer *), clarfg_(integer *, complex *, complex *, - integer *, complex *), clacgv_(integer *, complex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLABRD reduces the first NB rows and columns of a complex general - m by n matrix A to upper or lower real bidiagonal form by a unitary - transformation Q' * A * P, and returns the matrices X and Y which - are needed to apply the transformation to the unreduced part of A. - - If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower - bidiagonal form. - - This is an auxiliary routine called by CGEBRD - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. - - N (input) INTEGER - The number of columns in the matrix A. - - NB (input) INTEGER - The number of leading rows and columns of A to be reduced. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, the first NB rows and columns of the matrix are - overwritten; the rest of the array is unchanged. - If m >= n, elements on and below the diagonal in the first NB - columns, with the array TAUQ, represent the unitary - matrix Q as a product of elementary reflectors; and - elements above the diagonal in the first NB rows, with the - array TAUP, represent the unitary matrix P as a product - of elementary reflectors. - If m < n, elements below the diagonal in the first NB - columns, with the array TAUQ, represent the unitary - matrix Q as a product of elementary reflectors, and - elements on and above the diagonal in the first NB rows, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (NB) - The diagonal elements of the first NB rows and columns of - the reduced matrix. D(i) = A(i,i). - - E (output) REAL array, dimension (NB) - The off-diagonal elements of the first NB rows and columns of - the reduced matrix. - - TAUQ (output) COMPLEX array dimension (NB) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX array, dimension (NB) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - X (output) COMPLEX array, dimension (LDX,NB) - The m-by-nb matrix X required to update the unreduced part - of A. - - LDX (input) INTEGER - The leading dimension of the array X. LDX >= max(1,M). - - Y (output) COMPLEX array, dimension (LDY,NB) - The n-by-nb matrix Y required to update the unreduced part - of A. - - LDY (output) INTEGER - The leading dimension of the array Y. LDY >= max(1,N). - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors. - - If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in - A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The elements of the vectors v and u together form the m-by-nb matrix - V and the nb-by-n matrix U' which are needed, with X and Y, to apply - the transformation to the unreduced part of the matrix, using a block - update of the form: A := A - V*Y' - X*U'. - - The contents of A on exit are illustrated by the following examples - with nb = 2: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) - ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) - ( v1 v2 a a a ) ( v1 1 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix which is unchanged, - vi denotes an element of the vector defining H(i), and ui an element - of the vector defining G(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:m,i) */ - - i__2 = i__ - 1; - clacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, - &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - clacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx, - &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ * - a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & - tauq[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + ( - i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & - c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + - a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, & - y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[ - i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ + - x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, & - y[i__ * y_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + - 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - -/* Update A(i,i+1:n) */ - - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - clacgv_(&i__, &a[i__ + a_dim1], lda); - i__2 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 + - y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ + - (i__ + 1) * a_dim1], lda); - clacgv_(&i__, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &x[i__ + x_dim1], ldx); - i__2 = i__ - 1; - i__3 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + - 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, - &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &x[i__ + x_dim1], ldx); - -/* Generate reflection P(i) to annihilate A(i,i+2:n) */ - - i__2 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + (i__ + 1) * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + ( - i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], - lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1 - + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b55, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b55, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i,i:n) */ - - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy, - &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], - lda); - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &x[i__ + x_dim1], ldx); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ * - a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ + - i__ * a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &x[i__ + x_dim1], ldx); - -/* Generate reflection P(i) to annihilate A(i,i+1:n) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - if (i__ < *m) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; - cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__ - * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ + - y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[ - i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1 - + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[ - i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - -/* Update A(i+1:m,i) */ - - i__2 = i__ - 1; - clacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + - a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + - 1 + i__ * a_dim1], &c__1); - i__2 = i__ - 1; - clacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 + - x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, - &tauq[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + - 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * - a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], & - c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b55, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[ - i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1 - + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b55, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1) - * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - } else { - i__2 = *n - i__ + 1; - clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - } -/* L20: */ - } - } - return 0; - -/* End of CLABRD */ - -} /* clabrd_ */ - -/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, ioff; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLACGV conjugates a complex vector of length N. - - Arguments - ========= - - N (input) INTEGER - The length of the vector X. N >= 0. - - X (input/output) COMPLEX array, dimension - (1+(N-1)*abs(INCX)) - On entry, the vector of length N to be conjugated. - On exit, X is overwritten with conjg(X). - - INCX (input) INTEGER - The spacing between successive elements of X. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*incx == 1) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - r_cnjg(&q__1, &x[i__]); - x[i__2].r = q__1.r, x[i__2].i = q__1.i; -/* L10: */ - } - } else { - ioff = 1; - if (*incx < 0) { - ioff = 1 - (*n - 1) * *incx; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ioff; - r_cnjg(&q__1, &x[ioff]); - x[i__2].r = q__1.r, x[i__2].i = q__1.i; - ioff += *incx; -/* L20: */ - } - } - return 0; - -/* End of CLACGV */ - -} /* clacgv_ */ - -/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a, - integer *lda, complex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLACP2 copies all or part of a real two-dimensional matrix A to a - complex matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) REAL array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper trapezium - is accessed; if UPLO = 'L', only the lower trapezium is - accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) COMPLEX array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.f; -/* L30: */ - } -/* L40: */ - } - - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.f; -/* L50: */ - } -/* L60: */ - } - } - - return 0; - -/* End of CLACP2 */ - -} /* clacp2_ */ - -/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a, - integer *lda, complex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - CLACPY copies all or part of a two-dimensional matrix A to another - matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) COMPLEX array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper trapezium - is accessed; if UPLO = 'L', only the lower trapezium is - accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) COMPLEX array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L30: */ - } -/* L40: */ - } - - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L50: */ - } -/* L60: */ - } - } - - return 0; - -/* End of CLACPY */ - -} /* clacpy_ */ - -/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda, - real *b, integer *ldb, complex *c__, integer *ldc, real *rwork) -{ - /* System generated locals */ - integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5; - real r__1; - complex q__1; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLACRM performs a very simple matrix-matrix multiplication: - C := A * B, - where A is M by N and complex; B is N by N and real; - C is M by N and complex. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A and of the matrix C. - M >= 0. - - N (input) INTEGER - The number of columns and rows of the matrix B and - the number of columns of the matrix C. - N >= 0. - - A (input) COMPLEX array, dimension (LDA, N) - A contains the M by N matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >=max(1,M). - - B (input) REAL array, dimension (LDB, N) - B contains the N by N matrix B. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >=max(1,N). - - C (input) COMPLEX array, dimension (LDC, N) - C contains the M by N matrix C. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >=max(1,N). - - RWORK (workspace) REAL array, dimension (2*M*N) - - ===================================================================== - - - Quick return if possible. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --rwork; - - /* Function Body */ - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - rwork[(j - 1) * *m + i__] = a[i__3].r; -/* L10: */ - } -/* L20: */ - } - - l = *m * *n + 1; - sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, & - c_b1101, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = l + (j - 1) * *m + i__ - 1; - c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; -/* L30: */ - } -/* L40: */ - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]); -/* L50: */ - } -/* L60: */ - } - sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, & - c_b1101, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - r__1 = c__[i__4].r; - i__5 = l + (j - 1) * *m + i__ - 1; - q__1.r = r__1, q__1.i = rwork[i__5]; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L70: */ - } -/* L80: */ - } - - return 0; - -/* End of CLACRM */ - -} /* clacrm_ */ - -/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y) -{ - /* System generated locals */ - real r__1, r__2, r__3, r__4; - complex q__1; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static real zi, zr; - extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * - , real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLADIV := X / Y, where X and Y are complex. The computation of X / Y - will not overflow on an intermediary step unless the results - overflows. - - Arguments - ========= - - X (input) COMPLEX - Y (input) COMPLEX - The complex scalars X and Y. - - ===================================================================== -*/ - - - r__1 = x->r; - r__2 = r_imag(x); - r__3 = y->r; - r__4 = r_imag(y); - sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi); - q__1.r = zr, q__1.i = zi; - ret_val->r = q__1.r, ret_val->i = q__1.i; - - return ; - -/* End of CLADIV */ - -} /* cladiv_ */ - -/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e, - complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, - integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; - real r__1; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; - static real temp; - static integer curr, iperm; - extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, - complex *, integer *); - static integer indxq, iwrem; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - static integer iqptr; - extern /* Subroutine */ int claed7_(integer *, integer *, integer *, - integer *, integer *, integer *, real *, complex *, integer *, - real *, integer *, real *, integer *, integer *, integer *, - integer *, integer *, real *, complex *, real *, integer *, - integer *); - static integer tlvls; - extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, - integer *, real *, integer *, complex *, integer *, real *); - static integer igivcl; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz, - iprmpt, smlsiz; - extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, - real *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - Using the divide and conquer method, CLAED0 computes all eigenvalues - of a symmetric tridiagonal matrix which is one diagonal block of - those from reducing a dense or band Hermitian matrix and - corresponding eigenvectors of the dense or band matrix. - - Arguments - ========= - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the off-diagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Q (input/output) COMPLEX array, dimension (LDQ,N) - On entry, Q must contain an QSIZ x N matrix whose columns - unitarily orthonormal. It is a part of the unitary matrix - that reduces the full dense Hermitian matrix to a - (reducible) symmetric tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - IWORK (workspace) INTEGER array, - the dimension of IWORK must be at least - 6 + 6*N + 5*N*lg N - ( lg( N ) = smallest integer k - such that 2^k >= N ) - - RWORK (workspace) REAL array, - dimension (1 + 3*N + 2*N*lg N + 3*N**2) - ( lg( N ) = smallest integer k - such that 2^k >= N ) - - QSTORE (workspace) COMPLEX array, dimension (LDQS, N) - Used to store parts of - the eigenvector matrix when the updating matrix multiplies - take place. - - LDQS (input) INTEGER - The leading dimension of the array QSTORE. - LDQS >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - ===================================================================== - - Warning: N could be as big as QSIZ! - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - qstore_dim1 = *ldqs; - qstore_offset = 1 + qstore_dim1; - qstore -= qstore_offset; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - -/* - IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN - INFO = -1 - ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) - $ THEN -*/ - if (*qsiz < max(0,*n)) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } else if (*ldqs < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLAED0", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Determine the size and placement of the submatrices, and save in - the leading elements of IWORK. -*/ - - iwork[1] = *n; - subpbs = 1; - tlvls = 0; -L10: - if (iwork[subpbs] > smlsiz) { - for (j = subpbs; j >= 1; --j) { - iwork[j * 2] = (iwork[j] + 1) / 2; - iwork[(j << 1) - 1] = iwork[j] / 2; -/* L20: */ - } - ++tlvls; - subpbs <<= 1; - goto L10; - } - i__1 = subpbs; - for (j = 2; j <= i__1; ++j) { - iwork[j] += iwork[j - 1]; -/* L30: */ - } - -/* - Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 - using rank-1 modifications (cuts). -*/ - - spm1 = subpbs - 1; - i__1 = spm1; - for (i__ = 1; i__ <= i__1; ++i__) { - submat = iwork[i__] + 1; - smm1 = submat - 1; - d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); - d__[submat] -= (r__1 = e[smm1], dabs(r__1)); -/* L40: */ - } - - indxq = (*n << 2) + 3; - -/* - Set up workspaces for eigenvalues only/accumulate new vectors - routine -*/ - - temp = log((real) (*n)) / log(2.f); - lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - iprmpt = indxq + *n + 1; - iperm = iprmpt + *n * lgn; - iqptr = iperm + *n * lgn; - igivpt = iqptr + *n + 2; - igivcl = igivpt + *n * lgn; - - igivnm = 1; - iq = igivnm + (*n << 1) * lgn; -/* Computing 2nd power */ - i__1 = *n; - iwrem = iq + i__1 * i__1 + 1; -/* Initialize pointers */ - i__1 = subpbs; - for (i__ = 0; i__ <= i__1; ++i__) { - iwork[iprmpt + i__] = 1; - iwork[igivpt + i__] = 1; -/* L50: */ - } - iwork[iqptr] = 1; - -/* - Solve each submatrix eigenproblem at the bottom of the divide and - conquer tree. -*/ - - curr = 0; - i__1 = spm1; - for (i__ = 0; i__ <= i__1; ++i__) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[1]; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 1] - iwork[i__]; - } - ll = iq - 1 + iwork[iqptr + curr]; - ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & - rwork[1], info); - clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & - matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] - ); -/* Computing 2nd power */ - i__2 = matsiz; - iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; - ++curr; - if (*info > 0) { - *info = submat * (*n + 1) + submat + matsiz - 1; - return 0; - } - k = 1; - i__2 = iwork[i__ + 1]; - for (j = submat; j <= i__2; ++j) { - iwork[indxq + j] = k; - ++k; -/* L60: */ - } -/* L70: */ - } - -/* - Successively merge eigensystems of adjacent submatrices - into eigensystem for the corresponding larger matrix. - - while ( SUBPBS > 1 ) -*/ - - curlvl = 1; -L80: - if (subpbs > 1) { - spm2 = subpbs - 2; - i__1 = spm2; - for (i__ = 0; i__ <= i__1; i__ += 2) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[2]; - msd2 = iwork[1]; - curprb = 0; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 2] - iwork[i__]; - msd2 = matsiz / 2; - ++curprb; - } - -/* - Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) - into an eigensystem of size MATSIZ. CLAED7 handles the case - when the eigenvectors of a full or band Hermitian matrix (which - was reduced to tridiagonal form) are desired. - - I am free to use Q as a valuable working space until Loop 150. -*/ - - claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ - submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ - submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & - iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ - igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * - q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); - if (*info > 0) { - *info = submat * (*n + 1) + submat + matsiz - 1; - return 0; - } - iwork[i__ / 2 + 1] = iwork[i__ + 2]; -/* L90: */ - } - subpbs /= 2; - ++curlvl; - goto L80; - } - -/* - end while - - Re-merge the eigenvalues/vectors which were deflated at the final - merge step. -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - rwork[i__] = d__[j]; - ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] - , &c__1); -/* L100: */ - } - scopy_(n, &rwork[1], &c__1, &d__[1], &c__1); - - return 0; - -/* End of CLAED0 */ - -} /* claed0_ */ - -/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz, - integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * - q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * - qptr, integer *prmptr, integer *perm, integer *givptr, integer * - givcol, real *givnum, complex *work, real *rwork, integer *iwork, - integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr, - indxc, indxp; - extern /* Subroutine */ int claed8_(integer *, integer *, integer *, - complex *, integer *, real *, real *, integer *, real *, real *, - complex *, integer *, real *, integer *, integer *, integer *, - integer *, integer *, integer *, real *, integer *), slaed9_( - integer *, integer *, integer *, integer *, real *, real *, - integer *, real *, real *, real *, real *, integer *, integer *), - slaeda_(integer *, integer *, integer *, integer *, integer *, - integer *, integer *, integer *, real *, real *, integer *, real * - , real *, integer *); - static integer idlmda; - extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, - integer *, real *, integer *, complex *, integer *, real *), - xerbla_(char *, integer *), slamrg_(integer *, integer *, - real *, integer *, integer *, integer *); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLAED7 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and optionally eigenvectors of a dense or banded - Hermitian matrix that has been reduced to tridiagonal form. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine SLAED2. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine SLAED4 (as called by SLAED3). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= curlvl <= tlvls. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - D (input/output) REAL array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) COMPLEX array, dimension (LDQ,N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - RHO (input) REAL - Contains the subdiagonal element used to create the rank-1 - modification. - - INDXQ (output) INTEGER array, dimension (N) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, - ie. D( INDXQ( I = 1, N ) ) will be in ascending order. - - IWORK (workspace) INTEGER array, dimension (4*N) - - RWORK (workspace) REAL array, - dimension (3*N+2*QSIZ*N) - - WORK (workspace) COMPLEX array, dimension (QSIZ*N) - - QSTORE (input/output) REAL array, dimension (N**2+1) - Stores eigenvectors of submatrices encountered during - divide and conquer, packed together. QPTR points to - beginning of the submatrices. - - QPTR (input/output) INTEGER array, dimension (N+2) - List of indices pointing to beginning of submatrices stored - in QSTORE. The submatrices are numbered starting at the - bottom left of the divide and conquer tree, from left to - right and bottom to top. - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and also the size of - the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) REAL array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --qstore; - --qptr; - --prmptr; - --perm; - --givptr; - givcol -= 3; - givnum -= 3; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - -/* - IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN - INFO = -1 - ELSE IF( N.LT.0 ) THEN -*/ - if (*n < 0) { - *info = -1; - } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { - *info = -2; - } else if (*qsiz < *n) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLAED7", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in SLAED2 and SLAED3. -*/ - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq = iw + *n; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); -/* L10: */ - } - curr = ptr + *curpbm; - slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & - givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ - iz + *n], info); - -/* - When solving the final problem, we no longer need the stored data, - so we will overwrite the data from this level onto the previously - used storage space. -*/ - - if (*curlvl == *tlvls) { - qptr[curr] = 1; - prmptr[curr] = 1; - givptr[curr] = 1; - } - -/* Sort and Deflate eigenvalues. */ - - claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], - &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ - indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ - (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); - prmptr[curr + 1] = prmptr[curr] + *n; - givptr[curr + 1] += givptr[curr]; - -/* Solve Secular Equation. */ - - if (k != 0) { - slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] - , &rwork[iw], &qstore[qptr[curr]], &k, info); - clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ - q_offset], ldq, &rwork[iq]); -/* Computing 2nd power */ - i__1 = k; - qptr[curr + 1] = qptr[curr] + i__1 * i__1; - if (*info != 0) { - return 0; - } - -/* Prepare the INDXQ sorting premutation. */ - - n1 = k; - n2 = *n - k; - ind1 = 1; - ind2 = *n; - slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - qptr[curr + 1] = qptr[curr]; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L20: */ - } - } - - return 0; - -/* End of CLAED7 */ - -} /* claed7_ */ - -/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex * - q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, - real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, - integer *indx, integer *indxq, integer *perm, integer *givptr, - integer *givcol, real *givnum, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real c__; - static integer i__, j; - static real s, t; - static integer k2, n1, n2, jp, n1p1; - static real eps, tau, tol; - static integer jlam, imax, jmax; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - ccopy_(integer *, complex *, integer *, complex *, integer *), - csrot_(integer *, complex *, integer *, complex *, integer *, - real *, real *), scopy_(integer *, real *, integer *, real *, - integer *); - extern doublereal slapy2_(real *, real *), slamch_(char *); - extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex - *, integer *, complex *, integer *), xerbla_(char *, - integer *); - extern integer isamax_(integer *, real *, integer *); - extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer - *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - CLAED8 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny element in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - K (output) INTEGER - Contains the number of non-deflated eigenvalues. - This is the order of the related secular equation. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the dense or band matrix to tridiagonal form. - QSIZ >= N if ICOMPQ = 1. - - Q (input/output) COMPLEX array, dimension (LDQ,N) - On entry, Q contains the eigenvectors of the partially solved - system which has been previously updated in matrix - multiplies with other partially solved eigensystems. - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max( 1, N ). - - D (input/output) REAL array, dimension (N) - On entry, D contains the eigenvalues of the two submatrices to - be combined. On exit, D contains the trailing (N-K) updated - eigenvalues (those which were deflated) sorted into increasing - order. - - RHO (input/output) REAL - Contains the off diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. RHO is modified during the computation to - the value required by SLAED3. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. MIN(1,N) <= CUTPNT <= N. - - Z (input) REAL array, dimension (N) - On input this vector contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). The contents of Z are - destroyed during the updating process. - - DLAMDA (output) REAL array, dimension (N) - Contains a copy of the first K eigenvalues which will be used - by SLAED3 to form the secular equation. - - Q2 (output) COMPLEX array, dimension (LDQ2,N) - If ICOMPQ = 0, Q2 is not referenced. Otherwise, - Contains a copy of the first K eigenvectors which will be used - by SLAED7 in a matrix multiply (SGEMM) to update the new - eigenvectors. - - LDQ2 (input) INTEGER - The leading dimension of the array Q2. LDQ2 >= max( 1, N ). - - W (output) REAL array, dimension (N) - This will hold the first k values of the final - deflation-altered z-vector and will be passed to SLAED3. - - INDXP (workspace) INTEGER array, dimension (N) - This will contain the permutation used to place deflated - values of D at the end of the array. On output INDXP(1:K) - points to the nondeflated D-values and INDXP(K+1:N) - points to the deflated eigenvalues. - - INDX (workspace) INTEGER array, dimension (N) - This will contain the permutation used to sort the contents of - D into ascending order. - - INDXQ (input) INTEGER array, dimension (N) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that elements in - the second half of this permutation must first have CUTPNT - added to their values in order to be accurate. - - PERM (output) INTEGER array, dimension (N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (output) INTEGER - Contains the number of Givens rotations which took place in - this subproblem. - - GIVCOL (output) INTEGER array, dimension (2, N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (output) REAL array, dimension (2, N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --d__; - --z__; - --dlamda; - q2_dim1 = *ldq2; - q2_offset = 1 + q2_dim1; - q2 -= q2_offset; - --w; - --indxp; - --indx; - --indxq; - --perm; - givcol -= 3; - givnum -= 3; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -2; - } else if (*qsiz < *n) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -5; - } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { - *info = -8; - } else if (*ldq2 < max(1,*n)) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLAED8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n1 = *cutpnt; - n2 = *n - n1; - n1p1 = n1 + 1; - - if (*rho < 0.f) { - sscal_(&n2, &c_b1150, &z__[n1p1], &c__1); - } - -/* Normalize z so that norm(z) = 1 */ - - t = 1.f / sqrt(2.f); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - indx[j] = j; -/* L10: */ - } - sscal_(n, &t, &z__[1], &c__1); - *rho = (r__1 = *rho * 2.f, dabs(r__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { - indxq[i__] += *cutpnt; -/* L20: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; - w[i__] = z__[indxq[i__]]; -/* L30: */ - } - i__ = 1; - j = *cutpnt + 1; - slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = dlamda[indx[i__]]; - z__[i__] = w[indx[i__]]; -/* L40: */ - } - -/* Calculate the allowable deflation tolerance */ - - imax = isamax_(n, &z__[1], &c__1); - jmax = isamax_(n, &d__[1], &c__1); - eps = slamch_("Epsilon"); - tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); - -/* - If the rank-1 modifier is small enough, no more needs to be done - -- except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { - *k = 0; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; - ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] - , &c__1); -/* L50: */ - } - clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); - return 0; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - *k = 0; - *givptr = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - if (j == *n) { - goto L100; - } - } else { - jlam = j; - goto L70; - } -/* L60: */ - } -L70: - ++j; - if (j > *n) { - goto L90; - } - if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[jlam]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = slapy2_(&c__, &s); - t = d__[j] - d__[jlam]; - c__ /= tau; - s = -s / tau; - if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { - -/* Deflation is possible. */ - - z__[j] = tau; - z__[jlam] = 0.f; - -/* Record the appropriate Givens rotation */ - - ++(*givptr); - givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; - givcol[(*givptr << 1) + 2] = indxq[indx[j]]; - givnum[(*givptr << 1) + 1] = c__; - givnum[(*givptr << 1) + 2] = s; - csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ - indx[j]] * q_dim1 + 1], &c__1, &c__, &s); - t = d__[jlam] * c__ * c__ + d__[j] * s * s; - d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; - d__[jlam] = t; - --k2; - i__ = 1; -L80: - if (k2 + i__ <= *n) { - if (d__[jlam] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = jlam; - ++i__; - goto L80; - } else { - indxp[k2 + i__ - 1] = jlam; - } - } else { - indxp[k2 + i__ - 1] = jlam; - } - jlam = j; - } else { - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - jlam = j; - } - } - goto L70; -L90: - -/* Record the last eigenvalue. */ - - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - -L100: - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; - ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & - c__1); -/* L110: */ - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - if (*k < *n) { - i__1 = *n - *k; - scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = *n - *k; - clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + - 1) * q_dim1 + 1], ldq); - } - - return 0; - -/* End of CLAED8 */ - -} /* claed8_ */ - -/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n, - integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, - integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * - info) -{ - /* System generated locals */ - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; - real r__1, r__2, r__3, r__4, r__5, r__6; - complex q__1, q__2, q__3, q__4; - - /* Builtin functions */ - double r_imag(complex *); - void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *); - double c_abs(complex *); - - /* Local variables */ - static integer i__, j, k, l, m; - static real s; - static complex t, u, v[2], x, y; - static integer i1, i2; - static complex t1; - static real t2; - static complex v2; - static real h10; - static complex h11; - static real h21; - static complex h22; - static integer nh, nz; - static complex h11s; - static integer itn, its; - static real ulp; - static complex sum; - static real tst1; - static complex temp; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), ccopy_(integer *, complex *, integer *, complex *, - integer *); - static real rtemp, rwork[1]; - extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, - integer *, complex *); - extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); - extern doublereal slamch_(char *), clanhs_(char *, integer *, - complex *, integer *, real *); - static real smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLAHQR is an auxiliary routine called by CHSEQR to update the - eigenvalues and Schur decomposition already computed by CHSEQR, by - dealing with the Hessenberg submatrix in rows and columns ILO to IHI. - - Arguments - ========= - - WANTT (input) LOGICAL - = .TRUE. : the full Schur form T is required; - = .FALSE.: only eigenvalues are required. - - WANTZ (input) LOGICAL - = .TRUE. : the matrix of Schur vectors Z is required; - = .FALSE.: Schur vectors are not required. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows and - columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). - CLAHQR works primarily with the Hessenberg submatrix in rows - and columns ILO to IHI, but applies transformations to all of - H if WANTT is .TRUE.. - 1 <= ILO <= max(1,IHI); IHI <= N. - - H (input/output) COMPLEX array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if WANTT is .TRUE., H is upper triangular in rows - and columns ILO:IHI, with any 2-by-2 diagonal blocks in - standard form. If WANTT is .FALSE., the contents of H are - unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - W (output) COMPLEX array, dimension (N) - The computed eigenvalues ILO to IHI are stored in the - corresponding elements of W. If WANTT is .TRUE., the - eigenvalues are stored in the same order as on the diagonal - of the Schur form returned in H, with W(i) = H(i,i). - - ILOZ (input) INTEGER - IHIZ (input) INTEGER - Specify the rows of Z to which transformations must be - applied if WANTZ is .TRUE.. - 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. - - Z (input/output) COMPLEX array, dimension (LDZ,N) - If WANTZ is .TRUE., on entry Z must contain the current - matrix Z of transformations accumulated by CHSEQR, and on - exit Z has been updated; transformations are applied only to - the submatrix Z(ILOZ:IHIZ,ILO:IHI). - If WANTZ is .FALSE., Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = i, CLAHQR failed to compute all the - eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) - iterations; elements i+1:ihi of W contain those - eigenvalues which have been successfully computed. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --w; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - i__1 = *ilo; - i__2 = *ilo + *ilo * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - return 0; - } - - nh = *ihi - *ilo + 1; - nz = *ihiz - *iloz + 1; - -/* - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - ulp = slamch_("Precision"); - smlnum = slamch_("Safe minimum") / ulp; - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (*wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of 1. Each iteration of the loop works - with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO, or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L10: - if (i__ < *ilo) { - goto L130; - } - -/* - Perform QR iterations on rows and columns ILO to I until a - submatrix of order 1 splits off at the bottom because a - subdiagonal element has become negligible. -*/ - - l = *ilo; - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - i__3 = k - 1 + (k - 1) * h_dim1; - i__4 = k + k * h_dim1; - tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k - - 1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4] - .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]), - dabs(r__4))); - if (tst1 == 0.f) { - i__3 = i__ - l + 1; - tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); - } - i__3 = k + (k - 1) * h_dim1; -/* Computing MAX */ - r__2 = ulp * tst1; - if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) { - goto L30; - } -/* L20: */ - } -L30: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible */ - - i__2 = l + (l - 1) * h_dim1; - h__[i__2].r = 0.f, h__[i__2].i = 0.f; - } - -/* Exit from loop if a submatrix of order 1 has split off. */ - - if (l >= i__) { - goto L120; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! (*wantt)) { - i1 = l; - i2 = i__; - } - - if (its == 10 || its == 20) { - -/* Exceptional shift. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f; - i__2 = i__ + i__ * h_dim1; - q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i; - t.r = q__1.r, t.i = q__1.i; - } else { - -/* Wilkinson's shift. */ - - i__2 = i__ + i__ * h_dim1; - t.r = h__[i__2].r, t.i = h__[i__2].i; - i__2 = i__ - 1 + i__ * h_dim1; - i__3 = i__ + (i__ - 1) * h_dim1; - r__1 = h__[i__3].r; - q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i; - u.r = q__1.r, u.i = q__1.i; - if (u.r != 0.f || u.i != 0.f) { - i__2 = i__ - 1 + (i__ - 1) * h_dim1; - q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i; - q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f; - x.r = q__1.r, x.i = q__1.i; - q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i * - x.r; - q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i; - c_sqrt(&q__1, &q__2); - y.r = q__1.r, y.i = q__1.i; - if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) { - q__1.r = -y.r, q__1.i = -y.i; - y.r = q__1.r, y.i = q__1.i; - } - q__3.r = x.r + y.r, q__3.i = x.i + y.i; - cladiv_(&q__2, &u, &q__3); - q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i; - t.r = q__1.r, t.i = q__1.i; - } - } - -/* Look for two consecutive small subdiagonal elements. */ - - i__2 = l + 1; - for (m = i__ - 1; m >= i__2; --m) { - -/* - Determine the effect of starting the single-shift QR - iteration at row M, and see if this would make H(M,M-1) - negligible. -*/ - - i__3 = m + m * h_dim1; - h11.r = h__[i__3].r, h11.i = h__[i__3].i; - i__3 = m + 1 + (m + 1) * h_dim1; - h22.r = h__[i__3].r, h22.i = h__[i__3].i; - q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; - h11s.r = q__1.r, h11s.i = q__1.i; - i__3 = m + 1 + m * h_dim1; - h21 = h__[i__3].r; - s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs( - r__2)) + dabs(h21); - q__1.r = h11s.r / s, q__1.i = h11s.i / s; - h11s.r = q__1.r, h11s.i = q__1.i; - h21 /= s; - v[0].r = h11s.r, v[0].i = h11s.i; - v[1].r = h21, v[1].i = 0.f; - i__3 = m + (m - 1) * h_dim1; - h10 = h__[i__3].r; - tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs( - r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(& - h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 = - r_imag(&h22), dabs(r__6)))); - if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) { - goto L50; - } -/* L40: */ - } - i__2 = l + l * h_dim1; - h11.r = h__[i__2].r, h11.i = h__[i__2].i; - i__2 = l + 1 + (l + 1) * h_dim1; - h22.r = h__[i__2].r, h22.i = h__[i__2].i; - q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; - h11s.r = q__1.r, h11s.i = q__1.i; - i__2 = l + 1 + l * h_dim1; - h21 = h__[i__2].r; - s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2)) - + dabs(h21); - q__1.r = h11s.r / s, q__1.i = h11s.i / s; - h11s.r = q__1.r, h11s.i = q__1.i; - h21 /= s; - v[0].r = h11s.r, v[0].i = h11s.i; - v[1].r = h21, v[1].i = 0.f; -L50: - -/* Single-shift QR step */ - - i__2 = i__ - 1; - for (k = m; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. - - V(2) is always real before the call to CLARFG, and hence - after the call T2 ( = T1*V(2) ) is also real. -*/ - - if (k > m) { - ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - clarfg_(&c__2, v, &v[1], &c__1, &t1); - if (k > m) { - i__3 = k + (k - 1) * h_dim1; - h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; - i__3 = k + 1 + (k - 1) * h_dim1; - h__[i__3].r = 0.f, h__[i__3].i = 0.f; - } - v2.r = v[1].r, v2.i = v[1].i; - q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i * - v2.r; - t2 = q__1.r; - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - r_cnjg(&q__3, &t1); - i__4 = k + j * h_dim1; - q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i = - q__3.r * h__[i__4].i + q__3.i * h__[i__4].r; - i__5 = k + 1 + j * h_dim1; - q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i; - q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; - sum.r = q__1.r, sum.i = q__1.i; - i__4 = k + j * h_dim1; - i__5 = k + j * h_dim1; - q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i; - h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; - i__4 = k + 1 + j * h_dim1; - i__5 = k + 1 + j * h_dim1; - q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i + - sum.i * v2.r; - q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i; - h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; -/* L60: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+2,I). - - Computing MIN -*/ - i__4 = k + 2; - i__3 = min(i__4,i__); - for (j = i1; j <= i__3; ++j) { - i__4 = j + k * h_dim1; - q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i = - t1.r * h__[i__4].i + t1.i * h__[i__4].r; - i__5 = j + (k + 1) * h_dim1; - q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - sum.r = q__1.r, sum.i = q__1.i; - i__4 = j + k * h_dim1; - i__5 = j + k * h_dim1; - q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i; - h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; - i__4 = j + (k + 1) * h_dim1; - i__5 = j + (k + 1) * h_dim1; - r_cnjg(&q__3, &v2); - q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * - q__3.i + sum.i * q__3.r; - q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i; - h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; -/* L70: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - i__4 = j + k * z_dim1; - q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i = - t1.r * z__[i__4].i + t1.i * z__[i__4].r; - i__5 = j + (k + 1) * z_dim1; - q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - sum.r = q__1.r, sum.i = q__1.i; - i__4 = j + k * z_dim1; - i__5 = j + k * z_dim1; - q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i - - sum.i; - z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; - i__4 = j + (k + 1) * z_dim1; - i__5 = j + (k + 1) * z_dim1; - r_cnjg(&q__3, &v2); - q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * - q__3.i + sum.i * q__3.r; - q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i - - q__2.i; - z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; -/* L80: */ - } - } - - if (k == m && m > l) { - -/* - If the QR step was started at row M > L because two - consecutive small subdiagonals were found, then extra - scaling must be performed to ensure that H(M,M-1) remains - real. -*/ - - q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i; - temp.r = q__1.r, temp.i = q__1.i; - r__1 = c_abs(&temp); - q__1.r = temp.r / r__1, q__1.i = temp.i / r__1; - temp.r = q__1.r, temp.i = q__1.i; - i__3 = m + 1 + m * h_dim1; - i__4 = m + 1 + m * h_dim1; - r_cnjg(&q__2, &temp); - q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i = - h__[i__4].r * q__2.i + h__[i__4].i * q__2.r; - h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; - if (m + 2 <= i__) { - i__3 = m + 2 + (m + 1) * h_dim1; - i__4 = m + 2 + (m + 1) * h_dim1; - q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i, - q__1.i = h__[i__4].r * temp.i + h__[i__4].i * - temp.r; - h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; - } - i__3 = i__; - for (j = m; j <= i__3; ++j) { - if (j != m + 1) { - if (i2 > j) { - i__4 = i2 - j; - cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1], - ldh); - } - i__4 = j - i1; - r_cnjg(&q__1, &temp); - cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1); - if (*wantz) { - r_cnjg(&q__1, &temp); - cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], & - c__1); - } - } -/* L90: */ - } - } -/* L100: */ - } - -/* Ensure that H(I,I-1) is real. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (r_imag(&temp) != 0.f) { - rtemp = c_abs(&temp); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.f; - q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; - temp.r = q__1.r, temp.i = q__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - r_cnjg(&q__1, &temp); - cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (*wantz) { - cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); - } - } - -/* L110: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L120: - -/* H(I,I-1) is negligible: one eigenvalue has converged. */ - - i__1 = i__; - i__2 = i__ + i__ * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - -/* - Decrement number of remaining iterations, and return to start of - the main loop with new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L10; - -L130: - return 0; - -/* End of CLAHQR */ - -} /* clahqr_ */ - -/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, - integer *lda, complex *tau, complex *t, integer *ldt, complex *y, - integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, - i__3; - complex q__1; - - /* Local variables */ - static integer i__; - static complex ei; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), cgemv_(char *, integer *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, complex *, - integer *), ccopy_(integer *, complex *, integer *, - complex *, integer *), caxpy_(integer *, complex *, complex *, - integer *, complex *, integer *), ctrmv_(char *, char *, char *, - integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer - *, complex *), clacgv_(integer *, complex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) - matrix A so that elements below the k-th subdiagonal are zero. The - reduction is performed by a unitary similarity transformation - Q' * A * Q. The routine returns the matrices V and T which determine - Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. - - This is an auxiliary routine called by CGEHRD. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. - - K (input) INTEGER - The offset for the reduction. Elements below the k-th - subdiagonal in the first NB columns are reduced to zero. - - NB (input) INTEGER - The number of columns to be reduced. - - A (input/output) COMPLEX array, dimension (LDA,N-K+1) - On entry, the n-by-(n-k+1) general matrix A. - On exit, the elements on and above the k-th subdiagonal in - the first NB columns are overwritten with the corresponding - elements of the reduced matrix; the elements below the k-th - subdiagonal, with the array TAU, represent the matrix Q as a - product of elementary reflectors. The other columns of A are - unchanged. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX array, dimension (NB) - The scalar factors of the elementary reflectors. See Further - Details. - - T (output) COMPLEX array, dimension (LDT,NB) - The upper triangular matrix T. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= NB. - - Y (output) COMPLEX array, dimension (LDY,NB) - The n-by-nb matrix Y. - - LDY (input) INTEGER - The leading dimension of the array Y. LDY >= max(1,N). - - Further Details - =============== - - The matrix Q is represented as a product of nb elementary reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in - A(i+k+1:n,i), and tau in TAU(i). - - The elements of the vectors v together form the (n-k+1)-by-nb matrix - V which is needed, with T and Y, to apply the transformation to the - unreduced part of the matrix, using an update of the form: - A := (I - V*T*V') * (A - Y*V'). - - The contents of A on exit are illustrated by the following example - with n = 7, k = 3 and nb = 2: - - ( a h a a a ) - ( a h a a a ) - ( a h a a a ) - ( h h a a a ) - ( v1 h a a a ) - ( v1 v2 a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - --tau; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*n <= 1) { - return 0; - } - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ > 1) { - -/* - Update A(1:n,i) - - Compute i-th column of A - Y * V' -*/ - - i__2 = i__ - 1; - clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); - i__2 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k - + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], & - c__1); - i__2 = i__ - 1; - clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); - -/* - Apply I - V * T' * V' to this column (call it b) from the - left, using the last column of T as workspace - - Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) - ( V2 ) ( b2 ) - - where V1 is unit lower triangular - - w := V1' * b1 -*/ - - i__2 = i__ - 1; - ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + - 1], &c__1); - i__2 = i__ - 1; - ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + - a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); - -/* w := w + V2'*b2 */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ + - a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, - &t[*nb * t_dim1 + 1], &c__1); - -/* w := T'*w */ - - i__2 = i__ - 1; - ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); - -/* b2 := b2 - V2*w */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1], - lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ + - i__ * a_dim1], &c__1); - -/* b1 := b1 - V1*w */ - - i__2 = i__ - 1; - ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] - , lda, &t[*nb * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ - * a_dim1], &c__1); - - i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; - a[i__2].r = ei.r, a[i__2].i = ei.i; - } - -/* - Generate the elementary reflector H(i) to annihilate - A(k+i+1:n,i) -*/ - - i__2 = *k + i__ + i__ * a_dim1; - ei.r = a[i__2].r, ei.i = a[i__2].i; - i__2 = *n - *k - i__ + 1; -/* Computing MIN */ - i__3 = *k + i__ + 1; - clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__]) - ; - i__2 = *k + i__ + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute Y(1:n,i) */ - - i__2 = *n - *k - i__ + 1; - cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ + - a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[ - i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ * - t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1); - cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); - -/* Compute T(1:i,i) */ - - i__2 = i__ - 1; - i__3 = i__; - q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; - cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[i__ * t_dim1 + 1], &c__1) - ; - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - -/* L10: */ - } - i__1 = *k + *nb + *nb * a_dim1; - a[i__1].r = ei.r, a[i__1].i = ei.i; - - return 0; - -/* End of CLAHRD */ - -} /* clahrd_ */ - -doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer * - lda, real *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real ret_val, r__1, r__2; - - /* Builtin functions */ - double c_abs(complex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int classq_(integer *, complex *, integer *, real - *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLANGE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - complex matrix A. - - Description - =========== - - CLANGE returns the value - - CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in CLANGE as described - above. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. When M = 0, - CLANGE is set to zero. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. When N = 0, - CLANGE is set to zero. - - A (input) COMPLEX array, dimension (LDA,N) - The m by n matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(M,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= M when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (min(*m,*n) == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); - value = dmax(r__1,r__2); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - sum += c_abs(&a[i__ + j * a_dim1]); -/* L30: */ - } - value = dmax(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += c_abs(&a[i__ + j * a_dim1]); -/* L60: */ - } -/* L70: */ - } - value = 0.f; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of CLANGE */ - -} /* clange_ */ - -doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * - lda, real *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real ret_val, r__1, r__2, r__3; - - /* Builtin functions */ - double c_abs(complex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, absa, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int classq_(integer *, complex *, integer *, real - *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLANHE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - complex hermitian matrix A. - - Description - =========== - - CLANHE returns the value - - CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in CLANHE as described - above. - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - hermitian matrix A is to be referenced. - = 'U': Upper triangular part of A is referenced - = 'L': Lower triangular part of A is referenced - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, CLANHE is - set to zero. - - A (input) COMPLEX array, dimension (LDA,N) - The hermitian matrix A. If UPLO = 'U', the leading n by n - upper triangular part of A contains the upper triangular part - of the matrix A, and the strictly lower triangular part of A - is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of A contains the lower triangular part of - the matrix A, and the strictly upper triangular part of A is - not referenced. Note that the imaginary parts of the diagonal - elements need not be set and are assumed to be zero. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, - WORK is not referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); - value = dmax(r__1,r__2); -/* L10: */ - } -/* Computing MAX */ - i__2 = j + j * a_dim1; - r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); - value = dmax(r__2,r__3); -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = j + j * a_dim1; - r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); - value = dmax(r__2,r__3); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); - value = dmax(r__1,r__2); -/* L30: */ - } -/* L40: */ - } - } - } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { - -/* Find normI(A) ( = norm1(A), since A is hermitian). */ - - value = 0.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - absa = c_abs(&a[i__ + j * a_dim1]); - sum += absa; - work[i__] += absa; -/* L50: */ - } - i__2 = j + j * a_dim1; - work[j] = sum + (r__1 = a[i__2].r, dabs(r__1)); -/* L60: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L70: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L80: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * a_dim1; - sum = work[j] + (r__1 = a[i__2].r, dabs(r__1)); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - absa = c_abs(&a[i__ + j * a_dim1]); - sum += absa; - work[i__] += absa; -/* L90: */ - } - value = dmax(value,sum); -/* L100: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L110: */ - } - } else { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); -/* L120: */ - } - } - sum *= 2; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - if (a[i__2].r != 0.f) { - i__2 = i__ + i__ * a_dim1; - absa = (r__1 = a[i__2].r, dabs(r__1)); - if (scale < absa) { -/* Computing 2nd power */ - r__1 = scale / absa; - sum = sum * (r__1 * r__1) + 1.f; - scale = absa; - } else { -/* Computing 2nd power */ - r__1 = absa / scale; - sum += r__1 * r__1; - } - } -/* L130: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of CLANHE */ - -} /* clanhe_ */ - -doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real * - work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - real ret_val, r__1, r__2; - - /* Builtin functions */ - double c_abs(complex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int classq_(integer *, complex *, integer *, real - *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLANHS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - Hessenberg matrix A. - - Description - =========== - - CLANHS returns the value - - CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in CLANHS as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, CLANHS is - set to zero. - - A (input) COMPLEX array, dimension (LDA,N) - The n by n upper Hessenberg matrix A; the part of A below the - first sub-diagonal is not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= N when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); - value = dmax(r__1,r__2); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - sum += c_abs(&a[i__ + j * a_dim1]); -/* L30: */ - } - value = dmax(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += c_abs(&a[i__ + j * a_dim1]); -/* L60: */ - } -/* L70: */ - } - value = 0.f; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of CLANHS */ - -} /* clanhs_ */ - -/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda, - complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5; - real r__1; - complex q__1; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLARCM performs a very simple matrix-matrix multiplication: - C := A * B, - where A is M by M and real; B is M by N and complex; - C is M by N and complex. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A and of the matrix C. - M >= 0. - - N (input) INTEGER - The number of columns and rows of the matrix B and - the number of columns of the matrix C. - N >= 0. - - A (input) REAL array, dimension (LDA, M) - A contains the M by M matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >=max(1,M). - - B (input) REAL array, dimension (LDB, N) - B contains the M by N matrix B. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >=max(1,M). - - C (input) COMPLEX array, dimension (LDC, N) - C contains the M by N matrix C. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >=max(1,M). - - RWORK (workspace) REAL array, dimension (2*M*N) - - ===================================================================== - - - Quick return if possible. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --rwork; - - /* Function Body */ - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - rwork[(j - 1) * *m + i__] = b[i__3].r; -/* L10: */ - } -/* L20: */ - } - - l = *m * *n + 1; - sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, & - c_b1101, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = l + (j - 1) * *m + i__ - 1; - c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; -/* L30: */ - } -/* L40: */ - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]); -/* L50: */ - } -/* L60: */ - } - sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, & - c_b1101, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - r__1 = c__[i__4].r; - i__5 = l + (j - 1) * *m + i__ - 1; - q__1.r = r__1, q__1.i = rwork[i__5]; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L70: */ - } -/* L80: */ - } - - return 0; - -/* End of CLARCM */ - -} /* clarcm_ */ - -/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v, - integer *incv, complex *tau, complex *c__, integer *ldc, complex * - work) -{ - /* System generated locals */ - integer c_dim1, c_offset; - complex q__1; - - /* Local variables */ - extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, integer *), - cgemv_(char *, integer *, integer *, complex *, complex *, - integer *, complex *, integer *, complex *, complex *, integer *); - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLARF applies a complex elementary reflector H to a complex M-by-N - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a complex scalar and v is a complex vector. - - If tau = 0, then H is taken to be the unit matrix. - - To apply H' (the conjugate transpose of H), supply conjg(tau) instead - tau. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) COMPLEX array, dimension - (1 + (M-1)*abs(INCV)) if SIDE = 'L' - or (1 + (N-1)*abs(INCV)) if SIDE = 'R' - The vector v in the representation of H. V is not used if - TAU = 0. - - INCV (input) INTEGER - The increment between elements of v. INCV <> 0. - - TAU (input) COMPLEX - The value tau in the representation of H. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (lsame_(side, "L")) { - -/* Form H * C */ - - if (tau->r != 0.f || tau->i != 0.f) { - -/* w := C' * v */ - - cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, & - v[1], incv, &c_b55, &work[1], &c__1); - -/* C := C - v * w' */ - - q__1.r = -tau->r, q__1.i = -tau->i; - cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], - ldc); - } - } else { - -/* Form C * H */ - - if (tau->r != 0.f || tau->i != 0.f) { - -/* w := C * v */ - - cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], - incv, &c_b55, &work[1], &c__1); - -/* C := C - w * v' */ - - q__1.r = -tau->r, q__1.i = -tau->i; - cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], - ldc); - } - } - return 0; - -/* End of CLARF */ - -} /* clarf_ */ - -/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char * - storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, - complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, - integer *ldwork) -{ - /* System generated locals */ - integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, - work_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1, q__2; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, - complex *, integer *), ctrmm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *), clacgv_(integer *, - complex *, integer *); - static char transt[1]; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLARFB applies a complex block reflector H or its transpose H' to a - complex M-by-N matrix C, from either the left or the right. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply H or H' from the Left - = 'R': apply H or H' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply H (No transpose) - = 'C': apply H' (Conjugate transpose) - - DIRECT (input) CHARACTER*1 - Indicates how H is formed from a product of elementary - reflectors - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Indicates how the vectors which define the elementary - reflectors are stored: - = 'C': Columnwise - = 'R': Rowwise - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - K (input) INTEGER - The order of the matrix T (= the number of elementary - reflectors whose product defines the block reflector). - - V (input) COMPLEX array, dimension - (LDV,K) if STOREV = 'C' - (LDV,M) if STOREV = 'R' and SIDE = 'L' - (LDV,N) if STOREV = 'R' and SIDE = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); - if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); - if STOREV = 'R', LDV >= K. - - T (input) COMPLEX array, dimension (LDT,K) - The triangular K-by-K matrix T in the representation of the - block reflector. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by H*C or H'*C or C*H or C*H'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX array, dimension (LDWORK,K) - - LDWORK (input) INTEGER - The leading dimension of the array WORK. - If SIDE = 'L', LDWORK >= max(1,N); - if SIDE = 'R', LDWORK >= max(1,M). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - work_dim1 = *ldwork; - work_offset = 1 + work_dim1; - work -= work_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (lsame_(trans, "N")) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - - if (lsame_(storev, "C")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 ) (first K rows) - ( V2 ) - where V1 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); - clacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L10: */ - } - -/* W := W * V1 */ - - ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2 */ - - i__1 = *m - *k; - cgemm_("Conjugate transpose", "No transpose", n, k, &i__1, - &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + - v_dim1], ldv, &c_b56, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2 * W' */ - - i__1 = *m - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", &i__1, n, k, - &q__1, &v[*k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork, &c_b56, &c__[*k + 1 + - c_dim1], ldc); - } - -/* W := W * V1' */ - - ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, - &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * c_dim1; - i__4 = j + i__ * c_dim1; - r_cnjg(&q__2, &work[i__ + j * work_dim1]); - q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - - q__2.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L20: */ - } -/* L30: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L40: */ - } - -/* W := W * V1 */ - - ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2 */ - - i__1 = *n - *k; - cgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + - 1 + v_dim1], ldv, &c_b56, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C2 := C2 - W * V2' */ - - i__1 = *n - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", m, &i__1, k, - &q__1, &work[work_offset], ldwork, &v[*k + 1 + - v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1], - ldc); - } - -/* W := W * V1' */ - - ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, - &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * work_dim1; - q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L50: */ - } -/* L60: */ - } - } - - } else { - -/* - Let V = ( V1 ) - ( V2 ) (last K rows) - where V2 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); - clacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L70: */ - } - -/* W := W * V2 */ - - ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56, - &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - if (*m > *k) { - -/* W := W + C1'*V1 */ - - i__1 = *m - *k; - cgemm_("Conjugate transpose", "No transpose", n, k, &i__1, - &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b56, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1 * W' */ - - i__1 = *m - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", &i__1, n, k, - &q__1, &v[v_offset], ldv, &work[work_offset], - ldwork, &c_b56, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, - &c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = *m - *k + j + i__ * c_dim1; - i__4 = *m - *k + j + i__ * c_dim1; - r_cnjg(&q__2, &work[i__ + j * work_dim1]); - q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - - q__2.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L80: */ - } -/* L90: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L100: */ - } - -/* W := W * V2 */ - - ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56, - &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - if (*n > *k) { - -/* W := W + C1 * V1 */ - - i__1 = *n - *k; - cgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b56, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C1 := C1 - W * V1' */ - - i__1 = *n - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", m, &i__1, k, - &q__1, &work[work_offset], ldwork, &v[v_offset], - ldv, &c_b56, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, - &c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (*n - *k + j) * c_dim1; - i__4 = i__ + (*n - *k + j) * c_dim1; - i__5 = i__ + j * work_dim1; - q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L110: */ - } -/* L120: */ - } - } - } - - } else if (lsame_(storev, "R")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 V2 ) (V1: first K columns) - where V1 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); - clacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L130: */ - } - -/* W := W * V1' */ - - ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, - &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2' */ - - i__1 = *m - *k; - cgemm_("Conjugate transpose", "Conjugate transpose", n, k, - &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(* - k + 1) * v_dim1 + 1], ldv, &c_b56, &work[ - work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2' * W' */ - - i__1 = *m - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("Conjugate transpose", "Conjugate transpose", & - i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv, - &work[work_offset], ldwork, &c_b56, &c__[*k + 1 - + c_dim1], ldc); - } - -/* W := W * V1 */ - - ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * c_dim1; - i__4 = j + i__ * c_dim1; - r_cnjg(&q__2, &work[i__ + j * work_dim1]); - q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - - q__2.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L140: */ - } -/* L150: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L160: */ - } - -/* W := W * V1' */ - - ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, - &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2' */ - - i__1 = *n - *k; - cgemm_("No transpose", "Conjugate transpose", m, k, &i__1, - &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k - + 1) * v_dim1 + 1], ldv, &c_b56, &work[ - work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C2 := C2 - W * V2 */ - - i__1 = *n - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1, - &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 - + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1], - ldc); - } - -/* W := W * V1 */ - - ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * work_dim1; - q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L170: */ - } -/* L180: */ - } - - } - - } else { - -/* - Let V = ( V1 V2 ) (V2: last K columns) - where V2 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); - clacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L190: */ - } - -/* W := W * V2' */ - - ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, - &c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*m > *k) { - -/* W := W + C1'*V1' */ - - i__1 = *m - *k; - cgemm_("Conjugate transpose", "Conjugate transpose", n, k, - &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset], - ldv, &c_b56, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1' * W' */ - - i__1 = *m - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("Conjugate transpose", "Conjugate transpose", & - i__1, n, k, &q__1, &v[v_offset], ldv, &work[ - work_offset], ldwork, &c_b56, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56, - &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = *m - *k + j + i__ * c_dim1; - i__4 = *m - *k + j + i__ * c_dim1; - r_cnjg(&q__2, &work[i__ + j * work_dim1]); - q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - - q__2.i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L200: */ - } -/* L210: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L220: */ - } - -/* W := W * V2' */ - - ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, - &c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*n > *k) { - -/* W := W + C1 * V1' */ - - i__1 = *n - *k; - cgemm_("No transpose", "Conjugate transpose", m, k, &i__1, - &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b56, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C1 := C1 - W * V1 */ - - i__1 = *n - *k; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1, - &work[work_offset], ldwork, &v[v_offset], ldv, & - c_b56, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56, - &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (*n - *k + j) * c_dim1; - i__4 = i__ + (*n - *k + j) * c_dim1; - i__5 = i__ + j * work_dim1; - q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; -/* L230: */ - } -/* L240: */ - } - - } - - } - } - - return 0; - -/* End of CLARFB */ - -} /* clarfb_ */ - -/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer * - incx, complex *tau) -{ - /* System generated locals */ - integer i__1; - real r__1, r__2; - complex q__1, q__2; - - /* Builtin functions */ - double r_imag(complex *), r_sign(real *, real *); - - /* Local variables */ - static integer j, knt; - static real beta; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *); - static real alphi, alphr, xnorm; - extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real * - , real *, real *); - extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *); - static real safmin, rsafmn; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLARFG generates a complex elementary reflector H of order n, such - that - - H' * ( alpha ) = ( beta ), H' * H = I. - ( x ) ( 0 ) - - where alpha and beta are scalars, with beta real, and x is an - (n-1)-element complex vector. H is represented in the form - - H = I - tau * ( 1 ) * ( 1 v' ) , - ( v ) - - where tau is a complex scalar and v is a complex (n-1)-element - vector. Note that H is not hermitian. - - If the elements of x are all zero and alpha is real, then tau = 0 - and H is taken to be the unit matrix. - - Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . - - Arguments - ========= - - N (input) INTEGER - The order of the elementary reflector. - - ALPHA (input/output) COMPLEX - On entry, the value alpha. - On exit, it is overwritten with the value beta. - - X (input/output) COMPLEX array, dimension - (1+(N-2)*abs(INCX)) - On entry, the vector x. - On exit, it is overwritten with the vector v. - - INCX (input) INTEGER - The increment between elements of X. INCX > 0. - - TAU (output) COMPLEX - The value tau. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n <= 0) { - tau->r = 0.f, tau->i = 0.f; - return 0; - } - - i__1 = *n - 1; - xnorm = scnrm2_(&i__1, &x[1], incx); - alphr = alpha->r; - alphi = r_imag(alpha); - - if (xnorm == 0.f && alphi == 0.f) { - -/* H = I */ - - tau->r = 0.f, tau->i = 0.f; - } else { - -/* general case */ - - r__1 = slapy3_(&alphr, &alphi, &xnorm); - beta = -r_sign(&r__1, &alphr); - safmin = slamch_("S") / slamch_("E"); - rsafmn = 1.f / safmin; - - if (dabs(beta) < safmin) { - -/* XNORM, BETA may be inaccurate; scale X and recompute them */ - - knt = 0; -L10: - ++knt; - i__1 = *n - 1; - csscal_(&i__1, &rsafmn, &x[1], incx); - beta *= rsafmn; - alphi *= rsafmn; - alphr *= rsafmn; - if (dabs(beta) < safmin) { - goto L10; - } - -/* New BETA is at most 1, at least SAFMIN */ - - i__1 = *n - 1; - xnorm = scnrm2_(&i__1, &x[1], incx); - q__1.r = alphr, q__1.i = alphi; - alpha->r = q__1.r, alpha->i = q__1.i; - r__1 = slapy3_(&alphr, &alphi, &xnorm); - beta = -r_sign(&r__1, &alphr); - r__1 = (beta - alphr) / beta; - r__2 = -alphi / beta; - q__1.r = r__1, q__1.i = r__2; - tau->r = q__1.r, tau->i = q__1.i; - q__2.r = alpha->r - beta, q__2.i = alpha->i; - cladiv_(&q__1, &c_b56, &q__2); - alpha->r = q__1.r, alpha->i = q__1.i; - i__1 = *n - 1; - cscal_(&i__1, alpha, &x[1], incx); - -/* If ALPHA is subnormal, it may lose relative accuracy */ - - alpha->r = beta, alpha->i = 0.f; - i__1 = knt; - for (j = 1; j <= i__1; ++j) { - q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i; - alpha->r = q__1.r, alpha->i = q__1.i; -/* L20: */ - } - } else { - r__1 = (beta - alphr) / beta; - r__2 = -alphi / beta; - q__1.r = r__1, q__1.i = r__2; - tau->r = q__1.r, tau->i = q__1.i; - q__2.r = alpha->r - beta, q__2.i = alpha->i; - cladiv_(&q__1, &c_b56, &q__2); - alpha->r = q__1.r, alpha->i = q__1.i; - i__1 = *n - 1; - cscal_(&i__1, alpha, &x[1], incx); - alpha->r = beta, alpha->i = 0.f; - } - } - - return 0; - -/* End of CLARFG */ - -} /* clarfg_ */ - -/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer * - k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt) -{ - /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; - complex q__1; - - /* Local variables */ - static integer i__, j; - static complex vii; - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, - complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLARFT forms the triangular factor T of a complex block reflector H - of order n, which is defined as a product of k elementary reflectors. - - If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; - - If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. - - If STOREV = 'C', the vector which defines the elementary reflector - H(i) is stored in the i-th column of the array V, and - - H = I - V * T * V' - - If STOREV = 'R', the vector which defines the elementary reflector - H(i) is stored in the i-th row of the array V, and - - H = I - V' * T * V - - Arguments - ========= - - DIRECT (input) CHARACTER*1 - Specifies the order in which the elementary reflectors are - multiplied to form the block reflector: - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Specifies how the vectors which define the elementary - reflectors are stored (see also Further Details): - = 'C': columnwise - = 'R': rowwise - - N (input) INTEGER - The order of the block reflector H. N >= 0. - - K (input) INTEGER - The order of the triangular factor T (= the number of - elementary reflectors). K >= 1. - - V (input/output) COMPLEX array, dimension - (LDV,K) if STOREV = 'C' - (LDV,N) if STOREV = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i). - - T (output) COMPLEX array, dimension (LDT,K) - The k by k triangular factor T of the block reflector. - If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is - lower triangular. The rest of the array is not used. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - Further Details - =============== - - The shape of the matrix V and the storage of the vectors which define - the H(i) is best illustrated by the following example with n = 5 and - k = 3. The elements equal to 1 are not stored; the corresponding - array elements are modified but restored on exit. The rest of the - array is not used. - - DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': - - V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) - ( v1 1 ) ( 1 v2 v2 v2 ) - ( v1 v2 1 ) ( 1 v3 v3 ) - ( v1 v2 v3 ) - ( v1 v2 v3 ) - - DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': - - V = ( v1 v2 v3 ) V = ( v1 v1 1 ) - ( v1 v2 v3 ) ( v2 v2 v2 1 ) - ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) - ( 1 v3 ) - ( 1 ) - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - --tau; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - - /* Function Body */ - if (*n == 0) { - return 0; - } - - if (lsame_(direct, "F")) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) { - -/* H(i) = I */ - - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - t[i__3].r = 0.f, t[i__3].i = 0.f; -/* L10: */ - } - } else { - -/* general case */ - - i__2 = i__ + i__ * v_dim1; - vii.r = v[i__2].r, vii.i = v[i__2].i; - i__2 = i__ + i__ * v_dim1; - v[i__2].r = 1.f, v[i__2].i = 0.f; - if (lsame_(storev, "C")) { - -/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - i__4 = i__; - q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; - cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ - + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & - c_b55, &t[i__ * t_dim1 + 1], &c__1); - } else { - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ - - if (i__ < *n) { - i__2 = *n - i__; - clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); - } - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - i__4 = i__; - q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; - cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ * - v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & - c_b55, &t[i__ * t_dim1 + 1], &c__1); - if (i__ < *n) { - i__2 = *n - i__; - clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); - } - } - i__2 = i__ + i__ * v_dim1; - v[i__2].r = vii.r, v[i__2].i = vii.i; - -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - } -/* L20: */ - } - } else { - for (i__ = *k; i__ >= 1; --i__) { - i__1 = i__; - if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) { - -/* H(i) = I */ - - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - t[i__2].r = 0.f, t[i__2].i = 0.f; -/* L30: */ - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { - i__1 = *n - *k + i__ + i__ * v_dim1; - vii.r = v[i__1].r, vii.i = v[i__1].i; - i__1 = *n - *k + i__ + i__ * v_dim1; - v[i__1].r = 1.f, v[i__1].i = 0.f; - -/* - T(i+1:k,i) := - - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) -*/ - - i__1 = *n - *k + i__; - i__2 = *k - i__; - i__3 = i__; - q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; - cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[ - (i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1 - + 1], &c__1, &c_b55, &t[i__ + 1 + i__ * - t_dim1], &c__1); - i__1 = *n - *k + i__ + i__ * v_dim1; - v[i__1].r = vii.r, v[i__1].i = vii.i; - } else { - i__1 = i__ + (*n - *k + i__) * v_dim1; - vii.r = v[i__1].r, vii.i = v[i__1].i; - i__1 = i__ + (*n - *k + i__) * v_dim1; - v[i__1].r = 1.f, v[i__1].i = 0.f; - -/* - T(i+1:k,i) := - - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' -*/ - - i__1 = *n - *k + i__ - 1; - clacgv_(&i__1, &v[i__ + v_dim1], ldv); - i__1 = *k - i__; - i__2 = *n - *k + i__; - i__3 = i__; - q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; - cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ + - 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & - c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1); - i__1 = *n - *k + i__ - 1; - clacgv_(&i__1, &v[i__ + v_dim1], ldv); - i__1 = i__ + (*n - *k + i__) * v_dim1; - v[i__1].r = vii.r, v[i__1].i = vii.i; - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - } - i__1 = i__ + i__ * t_dim1; - i__2 = i__; - t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; - } -/* L40: */ - } - } - return 0; - -/* End of CLARFT */ - -} /* clarft_ */ - -/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v, - complex *tau, complex *c__, integer *ldc, complex *work) -{ - /* System generated locals */ - integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, - i__9, i__10, i__11; - complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10, - q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer j; - static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, - v7, v8, v9, t10, v10, sum; - extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLARFX applies a complex elementary reflector H to a complex m by n - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a complex scalar and v is a complex vector. - - If tau = 0, then H is taken to be the unit matrix - - This version uses inline code if H has order < 11. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) COMPLEX array, dimension (M) if SIDE = 'L' - or (N) if SIDE = 'R' - The vector v in the representation of H. - - TAU (input) COMPLEX - The value tau in the representation of H. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= max(1,M). - - WORK (workspace) COMPLEX array, dimension (N) if SIDE = 'L' - or (M) if SIDE = 'R' - WORK is not referenced if H has order < 11. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (tau->r == 0.f && tau->i == 0.f) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form H * C, where H has order m. */ - - switch (*m) { - case 1: goto L10; - case 2: goto L30; - case 3: goto L50; - case 4: goto L70; - case 5: goto L90; - case 6: goto L110; - case 7: goto L130; - case 8: goto L150; - case 9: goto L170; - case 10: goto L190; - } - -/* - Code for general M - - w := C'*v -*/ - - cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1] - , &c__1, &c_b55, &work[1], &c__1); - -/* C := C - tau * v * w' */ - - q__1.r = -tau->r, q__1.i = -tau->i; - cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], - ldc); - goto L410; -L10: - -/* Special code for 1 x 1 Householder */ - - q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i - + tau->i * v[1].r; - r_cnjg(&q__4, &v[1]); - q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i - + q__3.i * q__4.r; - q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; - t1.r = q__1.r, t1.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r * - c__[i__3].i + t1.i * c__[i__3].r; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L20: */ - } - goto L410; -L30: - -/* Special code for 2 x 2 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L40: */ - } - goto L410; -L50: - -/* Special code for 3 x 3 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i; - i__4 = j * c_dim1 + 3; - q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L60: */ - } - goto L410; -L70: - -/* Special code for 4 x 4 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i; - i__4 = j * c_dim1 + 3; - q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i; - i__5 = j * c_dim1 + 4; - q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L80: */ - } - goto L410; -L90: - -/* Special code for 5 x 5 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i; - i__4 = j * c_dim1 + 3; - q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i; - i__5 = j * c_dim1 + 4; - q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i; - i__6 = j * c_dim1 + 5; - q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r * - c__[i__6].i + v5.i * c__[i__6].r; - q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L100: */ - } - goto L410; -L110: - -/* Special code for 6 x 6 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - r_cnjg(&q__1, &v[6]); - v6.r = q__1.r, v6.i = q__1.i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i; - i__4 = j * c_dim1 + 3; - q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i; - i__5 = j * c_dim1 + 4; - q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i; - i__6 = j * c_dim1 + 5; - q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i; - i__7 = j * c_dim1 + 6; - q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L120: */ - } - goto L410; -L130: - -/* Special code for 7 x 7 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - r_cnjg(&q__1, &v[6]); - v6.r = q__1.r, v6.i = q__1.i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - r_cnjg(&q__1, &v[7]); - v7.r = q__1.r, v7.i = q__1.i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i; - i__4 = j * c_dim1 + 3; - q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i; - i__5 = j * c_dim1 + 4; - q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i; - i__6 = j * c_dim1 + 5; - q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i; - i__7 = j * c_dim1 + 6; - q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i; - i__8 = j * c_dim1 + 7; - q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L140: */ - } - goto L410; -L150: - -/* Special code for 8 x 8 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - r_cnjg(&q__1, &v[6]); - v6.r = q__1.r, v6.i = q__1.i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - r_cnjg(&q__1, &v[7]); - v7.r = q__1.r, v7.i = q__1.i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - r_cnjg(&q__1, &v[8]); - v8.r = q__1.r, v8.i = q__1.i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i; - i__4 = j * c_dim1 + 3; - q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i; - i__5 = j * c_dim1 + 4; - q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i; - i__6 = j * c_dim1 + 5; - q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i; - i__7 = j * c_dim1 + 6; - q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i; - i__8 = j * c_dim1 + 7; - q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i; - i__9 = j * c_dim1 + 8; - q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L160: */ - } - goto L410; -L170: - -/* Special code for 9 x 9 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - r_cnjg(&q__1, &v[6]); - v6.r = q__1.r, v6.i = q__1.i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - r_cnjg(&q__1, &v[7]); - v7.r = q__1.r, v7.i = q__1.i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - r_cnjg(&q__1, &v[8]); - v8.r = q__1.r, v8.i = q__1.i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - r_cnjg(&q__1, &v[9]); - v9.r = q__1.r, v9.i = q__1.i; - r_cnjg(&q__2, &v9); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t9.r = q__1.r, t9.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i; - i__4 = j * c_dim1 + 3; - q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i; - i__5 = j * c_dim1 + 4; - q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i; - i__6 = j * c_dim1 + 5; - q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i; - i__7 = j * c_dim1 + 6; - q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i; - i__8 = j * c_dim1 + 7; - q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i; - i__9 = j * c_dim1 + 8; - q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i; - i__10 = j * c_dim1 + 9; - q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 9; - i__3 = j * c_dim1 + 9; - q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + - sum.i * t9.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L180: */ - } - goto L410; -L190: - -/* Special code for 10 x 10 Householder */ - - r_cnjg(&q__1, &v[1]); - v1.r = q__1.r, v1.i = q__1.i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - r_cnjg(&q__1, &v[2]); - v2.r = q__1.r, v2.i = q__1.i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - r_cnjg(&q__1, &v[3]); - v3.r = q__1.r, v3.i = q__1.i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - r_cnjg(&q__1, &v[4]); - v4.r = q__1.r, v4.i = q__1.i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - r_cnjg(&q__1, &v[5]); - v5.r = q__1.r, v5.i = q__1.i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - r_cnjg(&q__1, &v[6]); - v6.r = q__1.r, v6.i = q__1.i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - r_cnjg(&q__1, &v[7]); - v7.r = q__1.r, v7.i = q__1.i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - r_cnjg(&q__1, &v[8]); - v8.r = q__1.r, v8.i = q__1.i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - r_cnjg(&q__1, &v[9]); - v9.r = q__1.r, v9.i = q__1.i; - r_cnjg(&q__2, &v9); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t9.r = q__1.r, t9.i = q__1.i; - r_cnjg(&q__1, &v[10]); - v10.r = q__1.r, v10.i = q__1.i; - r_cnjg(&q__2, &v10); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t10.r = q__1.r, t10.i = q__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r - * c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i; - i__4 = j * c_dim1 + 3; - q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i; - i__5 = j * c_dim1 + 4; - q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i; - i__6 = j * c_dim1 + 5; - q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i; - i__7 = j * c_dim1 + 6; - q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i; - i__8 = j * c_dim1 + 7; - q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i; - i__9 = j * c_dim1 + 8; - q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i; - i__10 = j * c_dim1 + 9; - q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i; - i__11 = j * c_dim1 + 10; - q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i = - v10.r * c__[i__11].i + v10.i * c__[i__11].r; - q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 9; - i__3 = j * c_dim1 + 9; - q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + - sum.i * t9.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j * c_dim1 + 10; - i__3 = j * c_dim1 + 10; - q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i + - sum.i * t10.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L200: */ - } - goto L410; - } else { - -/* Form C * H, where H has order n. */ - - switch (*n) { - case 1: goto L210; - case 2: goto L230; - case 3: goto L250; - case 4: goto L270; - case 5: goto L290; - case 6: goto L310; - case 7: goto L330; - case 8: goto L350; - case 9: goto L370; - case 10: goto L390; - } - -/* - Code for general N - - w := C * v -*/ - - cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], & - c__1, &c_b55, &work[1], &c__1); - -/* C := C - tau * w * v' */ - - q__1.r = -tau->r, q__1.i = -tau->i; - cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], - ldc); - goto L410; -L210: - -/* Special code for 1 x 1 Householder */ - - q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i - + tau->i * v[1].r; - r_cnjg(&q__4, &v[1]); - q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i - + q__3.i * q__4.r; - q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; - t1.r = q__1.r, t1.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r * - c__[i__3].i + t1.i * c__[i__3].r; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L220: */ - } - goto L410; -L230: - -/* Special code for 2 x 2 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L240: */ - } - goto L410; -L250: - -/* Special code for 3 x 3 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i; - i__4 = j + c_dim1 * 3; - q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L260: */ - } - goto L410; -L270: - -/* Special code for 4 x 4 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i; - i__4 = j + c_dim1 * 3; - q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i; - i__5 = j + (c_dim1 << 2); - q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L280: */ - } - goto L410; -L290: - -/* Special code for 5 x 5 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i; - i__4 = j + c_dim1 * 3; - q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i; - i__5 = j + (c_dim1 << 2); - q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i; - i__6 = j + c_dim1 * 5; - q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r * - c__[i__6].i + v5.i * c__[i__6].r; - q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L300: */ - } - goto L410; -L310: - -/* Special code for 6 x 6 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - v6.r = v[6].r, v6.i = v[6].i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i; - i__4 = j + c_dim1 * 3; - q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i; - i__5 = j + (c_dim1 << 2); - q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i; - i__6 = j + c_dim1 * 5; - q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i; - i__7 = j + c_dim1 * 6; - q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L320: */ - } - goto L410; -L330: - -/* Special code for 7 x 7 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - v6.r = v[6].r, v6.i = v[6].i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - v7.r = v[7].r, v7.i = v[7].i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i; - i__4 = j + c_dim1 * 3; - q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i; - i__5 = j + (c_dim1 << 2); - q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i; - i__6 = j + c_dim1 * 5; - q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i; - i__7 = j + c_dim1 * 6; - q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i; - i__8 = j + c_dim1 * 7; - q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L340: */ - } - goto L410; -L350: - -/* Special code for 8 x 8 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - v6.r = v[6].r, v6.i = v[6].i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - v7.r = v[7].r, v7.i = v[7].i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - v8.r = v[8].r, v8.i = v[8].i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i; - i__4 = j + c_dim1 * 3; - q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i; - i__5 = j + (c_dim1 << 2); - q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i; - i__6 = j + c_dim1 * 5; - q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i; - i__7 = j + c_dim1 * 6; - q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i; - i__8 = j + c_dim1 * 7; - q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i; - i__9 = j + (c_dim1 << 3); - q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L360: */ - } - goto L410; -L370: - -/* Special code for 9 x 9 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - v6.r = v[6].r, v6.i = v[6].i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - v7.r = v[7].r, v7.i = v[7].i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - v8.r = v[8].r, v8.i = v[8].i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - v9.r = v[9].r, v9.i = v[9].i; - r_cnjg(&q__2, &v9); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t9.r = q__1.r, t9.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i; - i__4 = j + c_dim1 * 3; - q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i; - i__5 = j + (c_dim1 << 2); - q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i; - i__6 = j + c_dim1 * 5; - q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i; - i__7 = j + c_dim1 * 6; - q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i; - i__8 = j + c_dim1 * 7; - q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i; - i__9 = j + (c_dim1 << 3); - q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i; - i__10 = j + c_dim1 * 9; - q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 9; - i__3 = j + c_dim1 * 9; - q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + - sum.i * t9.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L380: */ - } - goto L410; -L390: - -/* Special code for 10 x 10 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - r_cnjg(&q__2, &v1); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t1.r = q__1.r, t1.i = q__1.i; - v2.r = v[2].r, v2.i = v[2].i; - r_cnjg(&q__2, &v2); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t2.r = q__1.r, t2.i = q__1.i; - v3.r = v[3].r, v3.i = v[3].i; - r_cnjg(&q__2, &v3); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t3.r = q__1.r, t3.i = q__1.i; - v4.r = v[4].r, v4.i = v[4].i; - r_cnjg(&q__2, &v4); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t4.r = q__1.r, t4.i = q__1.i; - v5.r = v[5].r, v5.i = v[5].i; - r_cnjg(&q__2, &v5); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t5.r = q__1.r, t5.i = q__1.i; - v6.r = v[6].r, v6.i = v[6].i; - r_cnjg(&q__2, &v6); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t6.r = q__1.r, t6.i = q__1.i; - v7.r = v[7].r, v7.i = v[7].i; - r_cnjg(&q__2, &v7); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t7.r = q__1.r, t7.i = q__1.i; - v8.r = v[8].r, v8.i = v[8].i; - r_cnjg(&q__2, &v8); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t8.r = q__1.r, t8.i = q__1.i; - v9.r = v[9].r, v9.i = v[9].i; - r_cnjg(&q__2, &v9); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t9.r = q__1.r, t9.i = q__1.i; - v10.r = v[10].r, v10.i = v[10].i; - r_cnjg(&q__2, &v10); - q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i - + tau->i * q__2.r; - t10.r = q__1.r, t10.i = q__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r - * c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i; - i__4 = j + c_dim1 * 3; - q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i; - i__5 = j + (c_dim1 << 2); - q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i; - i__6 = j + c_dim1 * 5; - q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i; - i__7 = j + c_dim1 * 6; - q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i; - i__8 = j + c_dim1 * 7; - q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i; - i__9 = j + (c_dim1 << 3); - q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i; - i__10 = j + c_dim1 * 9; - q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i; - i__11 = j + c_dim1 * 10; - q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i = - v10.r * c__[i__11].i + v10.i * c__[i__11].r; - q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i; - sum.r = q__1.r, sum.i = q__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + - sum.i * t1.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + - sum.i * t2.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + - sum.i * t3.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + - sum.i * t4.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + - sum.i * t5.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + - sum.i * t6.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + - sum.i * t7.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + - sum.i * t8.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 9; - i__3 = j + c_dim1 * 9; - q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + - sum.i * t9.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; - i__2 = j + c_dim1 * 10; - i__3 = j + c_dim1 * 10; - q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i + - sum.i * t10.r; - q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; - c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; -/* L400: */ - } - goto L410; - } -L410: - return 0; - -/* End of CLARFX */ - -} /* clarfx_ */ - -/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real * - cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - complex q__1; - - /* Local variables */ - static integer i__, j, k1, k2, k3, k4; - static real mul, cto1; - static logical done; - static real ctoc; - extern logical lsame_(char *, char *); - static integer itype; - static real cfrom1; - extern doublereal slamch_(char *); - static real cfromc; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real bignum, smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - CLASCL multiplies the M by N complex matrix A by the real scalar - CTO/CFROM. This is done without over/underflow as long as the final - result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that - A may be full, upper triangular, lower triangular, upper Hessenberg, - or banded. - - Arguments - ========= - - TYPE (input) CHARACTER*1 - TYPE indices the storage type of the input matrix. - = 'G': A is a full matrix. - = 'L': A is a lower triangular matrix. - = 'U': A is an upper triangular matrix. - = 'H': A is an upper Hessenberg matrix. - = 'B': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the lower - half stored. - = 'Q': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the upper - half stored. - = 'Z': A is a band matrix with lower bandwidth KL and upper - bandwidth KU. - - KL (input) INTEGER - The lower bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - KU (input) INTEGER - The upper bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - CFROM (input) REAL - CTO (input) REAL - The matrix A is multiplied by CTO/CFROM. A(I,J) is computed - without over/underflow if the final result CTO*A(I,J)/CFROM - can be represented without over/underflow. CFROM must be - nonzero. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,M) - The matrix to be multiplied by CTO/CFROM. See TYPE for the - storage type. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - INFO (output) INTEGER - 0 - successful exit - <0 - if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - - if (lsame_(type__, "G")) { - itype = 0; - } else if (lsame_(type__, "L")) { - itype = 1; - } else if (lsame_(type__, "U")) { - itype = 2; - } else if (lsame_(type__, "H")) { - itype = 3; - } else if (lsame_(type__, "B")) { - itype = 4; - } else if (lsame_(type__, "Q")) { - itype = 5; - } else if (lsame_(type__, "Z")) { - itype = 6; - } else { - itype = -1; - } - - if (itype == -1) { - *info = -1; - } else if (*cfrom == 0.f) { - *info = -4; - } else if (*m < 0) { - *info = -6; - } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { - *info = -7; - } else if (itype <= 3 && *lda < max(1,*m)) { - *info = -9; - } else if (itype >= 4) { -/* Computing MAX */ - i__1 = *m - 1; - if (*kl < 0 || *kl > max(i__1,0)) { - *info = -2; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *n - 1; - if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && - *kl != *ku) { - *info = -3; - } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * - ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { - *info = -9; - } - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLASCL", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *m == 0) { - return 0; - } - -/* Get machine parameters */ - - smlnum = slamch_("S"); - bignum = 1.f / smlnum; - - cfromc = *cfrom; - ctoc = *cto; - -L10: - cfrom1 = cfromc * smlnum; - cto1 = ctoc / bignum; - if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { - mul = smlnum; - done = FALSE_; - cfromc = cfrom1; - } else if (dabs(cto1) > dabs(cfromc)) { - mul = bignum; - done = FALSE_; - ctoc = cto1; - } else { - mul = ctoc / cfromc; - done = TRUE_; - } - - if (itype == 0) { - -/* Full matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L20: */ - } -/* L30: */ - } - - } else if (itype == 1) { - -/* Lower triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L40: */ - } -/* L50: */ - } - - } else if (itype == 2) { - -/* Upper triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L60: */ - } -/* L70: */ - } - - } else if (itype == 3) { - -/* Upper Hessenberg matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j + 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L80: */ - } -/* L90: */ - } - - } else if (itype == 4) { - -/* Lower half of a symmetric band matrix */ - - k3 = *kl + 1; - k4 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = k3, i__4 = k4 - j; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L100: */ - } -/* L110: */ - } - - } else if (itype == 5) { - -/* Upper half of a symmetric band matrix */ - - k1 = *ku + 2; - k3 = *ku + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = k1 - j; - i__3 = k3; - for (i__ = max(i__2,1); i__ <= i__3; ++i__) { - i__2 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L120: */ - } -/* L130: */ - } - - } else if (itype == 6) { - -/* Band matrix */ - - k1 = *kl + *ku + 2; - k2 = *kl + 1; - k3 = (*kl << 1) + *ku + 1; - k4 = *kl + *ku + 1 + *m; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__3 = k1 - j; -/* Computing MIN */ - i__4 = k3, i__5 = k4 - j; - i__2 = min(i__4,i__5); - for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L140: */ - } -/* L150: */ - } - - } - - if (! done) { - goto L10; - } - - return 0; - -/* End of CLASCL */ - -} /* clascl_ */ - -/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex * - alpha, complex *beta, complex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLASET initializes a 2-D array A to BETA on the diagonal and - ALPHA on the offdiagonals. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be set. - = 'U': Upper triangular part is set. The lower triangle - is unchanged. - = 'L': Lower triangular part is set. The upper triangle - is unchanged. - Otherwise: All of the matrix A is set. - - M (input) INTEGER - On entry, M specifies the number of rows of A. - - N (input) INTEGER - On entry, N specifies the number of columns of A. - - ALPHA (input) COMPLEX - All the offdiagonal array elements are set to ALPHA. - - BETA (input) COMPLEX - All the diagonal array elements are set to BETA. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; - A(i,i) = BETA , 1 <= i <= min(m,n) - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - -/* - Set the diagonal to BETA and the strictly upper triangular - part of the array to ALPHA. -*/ - - i__1 = *n; - for (j = 2; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j - 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L10: */ - } -/* L20: */ - } - i__1 = min(*n,*m); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L30: */ - } - - } else if (lsame_(uplo, "L")) { - -/* - Set the diagonal to BETA and the strictly lower triangular - part of the array to ALPHA. -*/ - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L40: */ - } -/* L50: */ - } - i__1 = min(*n,*m); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L60: */ - } - - } else { - -/* - Set the array to BETA on the diagonal and ALPHA on the - offdiagonal. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L70: */ - } -/* L80: */ - } - i__1 = min(*m,*n); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L90: */ - } - } - - return 0; - -/* End of CLASET */ - -} /* claset_ */ - -/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m, - integer *n, real *c__, real *s, complex *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - complex q__1, q__2, q__3; - - /* Local variables */ - static integer i__, j, info; - static complex temp; - extern logical lsame_(char *, char *); - static real ctemp, stemp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - CLASR performs the transformation - - A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) - - A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) - - where A is an m by n complex matrix and P is an orthogonal matrix, - consisting of a sequence of plane rotations determined by the - parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' - and z = n when SIDE = 'R' or 'r' ): - - When DIRECT = 'F' or 'f' ( Forward sequence ) then - - P = P( z - 1 )*...*P( 2 )*P( 1 ), - - and when DIRECT = 'B' or 'b' ( Backward sequence ) then - - P = P( 1 )*P( 2 )*...*P( z - 1 ), - - where P( k ) is a plane rotation matrix for the following planes: - - when PIVOT = 'V' or 'v' ( Variable pivot ), - the plane ( k, k + 1 ) - - when PIVOT = 'T' or 't' ( Top pivot ), - the plane ( 1, k + 1 ) - - when PIVOT = 'B' or 'b' ( Bottom pivot ), - the plane ( k, z ) - - c( k ) and s( k ) must contain the cosine and sine that define the - matrix P( k ). The two by two plane rotation part of the matrix - P( k ), R( k ), is assumed to be of the form - - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - Arguments - ========= - - SIDE (input) CHARACTER*1 - Specifies whether the plane rotation matrix P is applied to - A on the left or the right. - = 'L': Left, compute A := P*A - = 'R': Right, compute A:= A*P' - - DIRECT (input) CHARACTER*1 - Specifies whether P is a forward or backward sequence of - plane rotations. - = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) - = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) - - PIVOT (input) CHARACTER*1 - Specifies the plane for which P(k) is a plane rotation - matrix. - = 'V': Variable pivot, the plane (k,k+1) - = 'T': Top pivot, the plane (1,k+1) - = 'B': Bottom pivot, the plane (k,z) - - M (input) INTEGER - The number of rows of the matrix A. If m <= 1, an immediate - return is effected. - - N (input) INTEGER - The number of columns of the matrix A. If n <= 1, an - immediate return is effected. - - C, S (input) REAL arrays, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - c(k) and s(k) contain the cosine and sine that define the - matrix P(k). The two by two plane rotation part of the - matrix P(k), R(k), is assumed to be of the form - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - A (input/output) COMPLEX array, dimension (LDA,N) - The m by n matrix A. On exit, A is overwritten by P*A if - SIDE = 'R' or by A*P' if SIDE = 'L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - --c__; - --s; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! (lsame_(side, "L") || lsame_(side, "R"))) { - info = 1; - } else if (! (lsame_(pivot, "V") || lsame_(pivot, - "T") || lsame_(pivot, "B"))) { - info = 2; - } else if (! (lsame_(direct, "F") || lsame_(direct, - "B"))) { - info = 3; - } else if (*m < 0) { - info = 4; - } else if (*n < 0) { - info = 5; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("CLASR ", &info); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form P * A */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + 1 + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + 1 + i__ * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__4 = j + i__ * a_dim1; - q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ - i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = j + i__ * a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__4 = j + i__ * a_dim1; - q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L10: */ - } - } -/* L20: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + 1 + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + 1 + i__ * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__3 = j + i__ * a_dim1; - q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ - i__3].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = j + i__ * a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__3 = j + i__ * a_dim1; - q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ - i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L30: */ - } - } -/* L40: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *m; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + i__ * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__4 = i__ * a_dim1 + 1; - q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ - i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = i__ * a_dim1 + 1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__4 = i__ * a_dim1 + 1; - q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L50: */ - } - } -/* L60: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + i__ * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__3 = i__ * a_dim1 + 1; - q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ - i__3].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = i__ * a_dim1 + 1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__3 = i__ * a_dim1 + 1; - q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ - i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L70: */ - } - } -/* L80: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + i__ * a_dim1; - i__4 = *m + i__ * a_dim1; - q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ - i__4].i; - q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = *m + i__ * a_dim1; - i__4 = *m + i__ * a_dim1; - q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ - i__4].i; - q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L90: */ - } - } -/* L100: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + i__ * a_dim1; - i__3 = *m + i__ * a_dim1; - q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ - i__3].i; - q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = *m + i__ * a_dim1; - i__3 = *m + i__ * a_dim1; - q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ - i__3].i; - q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L110: */ - } - } -/* L120: */ - } - } - } - } else if (lsame_(side, "R")) { - -/* Form A * P' */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (j + 1) * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + (j + 1) * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__4 = i__ + j * a_dim1; - q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ - i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = i__ + j * a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__4 = i__ + j * a_dim1; - q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L130: */ - } - } -/* L140: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + (j + 1) * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + (j + 1) * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__3 = i__ + j * a_dim1; - q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ - i__3].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = i__ + j * a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__3 = i__ + j * a_dim1; - q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ - i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L150: */ - } - } -/* L160: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + j * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__4 = i__ + a_dim1; - q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ - i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = i__ + a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__4 = i__ + a_dim1; - q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ - i__4].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L170: */ - } - } -/* L180: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + j * a_dim1; - q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; - i__3 = i__ + a_dim1; - q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ - i__3].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = i__ + a_dim1; - q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; - i__3 = i__ + a_dim1; - q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ - i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L190: */ - } - } -/* L200: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + j * a_dim1; - i__4 = i__ + *n * a_dim1; - q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ - i__4].i; - q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; - i__3 = i__ + *n * a_dim1; - i__4 = i__ + *n * a_dim1; - q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ - i__4].i; - q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__3].r = q__1.r, a[i__3].i = q__1.i; -/* L210: */ - } - } -/* L220: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + j * a_dim1; - i__3 = i__ + *n * a_dim1; - q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ - i__3].i; - q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = i__ + *n * a_dim1; - i__3 = i__ + *n * a_dim1; - q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ - i__3].i; - q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - - q__3.i; - a[i__2].r = q__1.r, a[i__2].i = q__1.i; -/* L230: */ - } - } -/* L240: */ - } - } - } - } - - return 0; - -/* End of CLASR */ - -} /* clasr_ */ - -/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real * - scale, real *sumsq) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - real r__1; - - /* Builtin functions */ - double r_imag(complex *); - - /* Local variables */ - static integer ix; - static real temp1; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLASSQ returns the values scl and ssq such that - - ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, - - where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is - assumed to be at least unity and the value of ssq will then satisfy - - 1.0 .le. ssq .le. ( sumsq + 2*n ). - - scale is assumed to be non-negative and scl returns the value - - scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), - i - - scale and sumsq must be supplied in SCALE and SUMSQ respectively. - SCALE and SUMSQ are overwritten by scl and ssq respectively. - - The routine makes only one pass through the vector X. - - Arguments - ========= - - N (input) INTEGER - The number of elements to be used from the vector X. - - X (input) COMPLEX array, dimension (N) - The vector x as described above. - x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. - - INCX (input) INTEGER - The increment between successive values of the vector X. - INCX > 0. - - SCALE (input/output) REAL - On entry, the value scale in the equation above. - On exit, SCALE is overwritten with the value scl . - - SUMSQ (input/output) REAL - On entry, the value sumsq in the equation above. - On exit, SUMSQ is overwritten with the value ssq . - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n > 0) { - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - i__3 = ix; - if (x[i__3].r != 0.f) { - i__3 = ix; - temp1 = (r__1 = x[i__3].r, dabs(r__1)); - if (*scale < temp1) { -/* Computing 2nd power */ - r__1 = *scale / temp1; - *sumsq = *sumsq * (r__1 * r__1) + 1; - *scale = temp1; - } else { -/* Computing 2nd power */ - r__1 = temp1 / *scale; - *sumsq += r__1 * r__1; - } - } - if (r_imag(&x[ix]) != 0.f) { - temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1)); - if (*scale < temp1) { -/* Computing 2nd power */ - r__1 = *scale / temp1; - *sumsq = *sumsq * (r__1 * r__1) + 1; - *scale = temp1; - } else { -/* Computing 2nd power */ - r__1 = temp1 / *scale; - *sumsq += r__1 * r__1; - } - } -/* L10: */ - } - } - - return 0; - -/* End of CLASSQ */ - -} /* classq_ */ - -/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer * - k1, integer *k2, integer *ipiv, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; - - /* Local variables */ - static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; - static complex temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CLASWP performs a series of row interchanges on the matrix A. - One row interchange is initiated for each of rows K1 through K2 of A. - - Arguments - ========= - - N (input) INTEGER - The number of columns of the matrix A. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the matrix of column dimension N to which the row - interchanges will be applied. - On exit, the permuted matrix. - - LDA (input) INTEGER - The leading dimension of the array A. - - K1 (input) INTEGER - The first element of IPIV for which a row interchange will - be done. - - K2 (input) INTEGER - The last element of IPIV for which a row interchange will - be done. - - IPIV (input) INTEGER array, dimension (M*abs(INCX)) - The vector of pivot indices. Only the elements in positions - K1 through K2 of IPIV are accessed. - IPIV(K) = L implies rows K and L are to be interchanged. - - INCX (input) INTEGER - The increment between successive values of IPIV. If IPIV - is negative, the pivots are applied in reverse order. - - Further Details - =============== - - Modified by - R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA - - ===================================================================== - - - Interchange row I with row IPIV(I) for each of rows K1 through K2. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - if (*incx > 0) { - ix0 = *k1; - i1 = *k1; - i2 = *k2; - inc = 1; - } else if (*incx < 0) { - ix0 = (1 - *k2) * *incx + 1; - i1 = *k2; - i2 = *k1; - inc = -1; - } else { - return 0; - } - - n32 = *n / 32 << 5; - if (n32 != 0) { - i__1 = n32; - for (j = 1; j <= i__1; j += 32) { - ix = ix0; - i__2 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) - { - ip = ipiv[ix]; - if (ip != i__) { - i__4 = j + 31; - for (k = j; k <= i__4; ++k) { - i__5 = i__ + k * a_dim1; - temp.r = a[i__5].r, temp.i = a[i__5].i; - i__5 = i__ + k * a_dim1; - i__6 = ip + k * a_dim1; - a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; - i__5 = ip + k * a_dim1; - a[i__5].r = temp.r, a[i__5].i = temp.i; -/* L10: */ - } - } - ix += *incx; -/* L20: */ - } -/* L30: */ - } - } - if (n32 != *n) { - ++n32; - ix = ix0; - i__1 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { - ip = ipiv[ix]; - if (ip != i__) { - i__2 = *n; - for (k = n32; k <= i__2; ++k) { - i__4 = i__ + k * a_dim1; - temp.r = a[i__4].r, temp.i = a[i__4].i; - i__4 = i__ + k * a_dim1; - i__5 = ip + k * a_dim1; - a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; - i__4 = ip + k * a_dim1; - a[i__4].r = temp.r, a[i__4].i = temp.i; -/* L40: */ - } - } - ix += *incx; -/* L50: */ - } - } - - return 0; - -/* End of CLASWP */ - -} /* claswp_ */ - -/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a, - integer *lda, real *e, complex *tau, complex *w, integer *ldw) -{ - /* System generated locals */ - integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; - real r__1; - complex q__1, q__2, q__3, q__4; - - /* Local variables */ - static integer i__, iw; - static complex alpha; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *); - extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer - *, complex *, integer *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *), chemv_(char *, integer *, complex *, - complex *, integer *, complex *, integer *, complex *, complex *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, - integer *, complex *, integer *), clarfg_(integer *, complex *, - complex *, integer *, complex *), clacgv_(integer *, complex *, - integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLATRD reduces NB rows and columns of a complex Hermitian matrix A to - Hermitian tridiagonal form by a unitary similarity - transformation Q' * A * Q, and returns the matrices V and W which are - needed to apply the transformation to the unreduced part of A. - - If UPLO = 'U', CLATRD reduces the last NB rows and columns of a - matrix, of which the upper triangle is supplied; - if UPLO = 'L', CLATRD reduces the first NB rows and columns of a - matrix, of which the lower triangle is supplied. - - This is an auxiliary routine called by CHETRD. - - Arguments - ========= - - UPLO (input) CHARACTER - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. - - NB (input) INTEGER - The number of rows and columns to be reduced. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit: - if UPLO = 'U', the last NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements above the diagonal - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors; - if UPLO = 'L', the first NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements below the diagonal - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - E (output) REAL array, dimension (N-1) - If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal - elements of the last NB columns of the reduced matrix; - if UPLO = 'L', E(1:nb) contains the subdiagonal elements of - the first NB columns of the reduced matrix. - - TAU (output) COMPLEX array, dimension (N-1) - The scalar factors of the elementary reflectors, stored in - TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. - See Further Details. - - W (output) COMPLEX array, dimension (LDW,NB) - The n-by-nb matrix W required to update the unreduced part - of A. - - LDW (input) INTEGER - The leading dimension of the array W. LDW >= max(1,N). - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n) H(n-1) . . . H(n-nb+1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), - and tau in TAU(i-1). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), - and tau in TAU(i). - - The elements of the vectors v together form the n-by-nb matrix V - which is needed, with W, to apply the transformation to the unreduced - part of the matrix, using a Hermitian rank-2k update of the form: - A := A - V*W' - W*V'. - - The contents of A on exit are illustrated by the following examples - with n = 5 and nb = 2: - - if UPLO = 'U': if UPLO = 'L': - - ( a a a v4 v5 ) ( d ) - ( a a v4 v5 ) ( 1 d ) - ( a 1 v5 ) ( v1 1 a ) - ( d 1 ) ( v1 v2 a a ) - ( d ) ( v1 v2 a a a ) - - where d denotes a diagonal element of the reduced matrix, a denotes - an element of the original matrix that is unchanged, and vi denotes - an element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --e; - --tau; - w_dim1 = *ldw; - w_offset = 1 + w_dim1; - w -= w_offset; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - - if (lsame_(uplo, "U")) { - -/* Reduce last NB columns of upper triangle */ - - i__1 = *n - *nb + 1; - for (i__ = *n; i__ >= i__1; --i__) { - iw = i__ - *n + *nb; - if (i__ < *n) { - -/* Update A(1:i,i) */ - - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - i__2 = *n - i__; - clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); - i__2 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & - c_b56, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b56, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - } - if (i__ > 1) { - -/* - Generate elementary reflector H(i) to annihilate - A(1:i-2,i) -*/ - - i__2 = i__ - 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = i__ - 1; - clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ - - 1]); - i__2 = i__ - 1; - e[i__2] = alpha.r; - i__2 = i__ - 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute W(1:i-1,i) */ - - i__2 = i__ - 1; - chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ * - a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], & - c__1); - if (i__ < *n) { - i__2 = i__ - 1; - i__3 = *n - i__; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[( - iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], - &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[( - i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], - &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) * - w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1); - } - i__2 = i__ - 1; - cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); - q__3.r = -.5f, q__3.i = -0.f; - i__2 = i__ - 1; - q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = - q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; - i__3 = i__ - 1; - cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * - a_dim1 + 1], &c__1); - q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * - q__4.i + q__2.i * q__4.r; - alpha.r = q__1.r, alpha.i = q__1.i; - i__2 = i__ - 1; - caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * - w_dim1 + 1], &c__1); - } - -/* L10: */ - } - } else { - -/* Reduce first NB columns of lower triangle */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:n,i) */ - - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - i__2 = i__ - 1; - clacgv_(&i__2, &w[i__ + w_dim1], ldw); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, - &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - clacgv_(&i__2, &w[i__ + w_dim1], ldw); - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw, - &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - r__1 = a[i__3].r; - a[i__2].r = r__1, a[i__2].i = 0.f; - if (i__ < *n) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:n,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, - &tau[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - -/* Compute W(i+1:n,i) */ - - i__2 = *n - i__; - chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ + - 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b55, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + - a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b55, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 + - w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); - q__3.r = -.5f, q__3.i = -0.f; - i__2 = i__; - q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = - q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; - i__3 = *n - i__; - cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * - q__4.i + q__2.i * q__4.r; - alpha.r = q__1.r, alpha.i = q__1.i; - i__2 = *n - i__; - caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - } - -/* L20: */ - } - } - - return 0; - -/* End of CLATRD */ - -} /* clatrd_ */ - -/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char * - normin, integer *n, complex *a, integer *lda, complex *x, real *scale, - real *cnorm, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - real r__1, r__2, r__3, r__4; - complex q__1, q__2, q__3, q__4; - - /* Builtin functions */ - double r_imag(complex *); - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j; - static real xj, rec, tjj; - static integer jinc; - static real xbnd; - static integer imax; - static real tmax; - static complex tjjs; - static real xmax, grow; - extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer - *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static real tscal; - static complex uscal; - static integer jlast; - extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer - *, complex *, integer *); - static complex csumj; - extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, - integer *, complex *, integer *); - static logical upper; - extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, - complex *, integer *, complex *, integer *), slabad_(real *, real *); - extern integer icamax_(integer *, complex *, integer *); - extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), xerbla_(char *, integer *); - static real bignum; - extern integer isamax_(integer *, real *, integer *); - extern doublereal scasum_(integer *, complex *, integer *); - static logical notran; - static integer jfirst; - static real smlnum; - static logical nounit; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1992 - - - Purpose - ======= - - CLATRS solves one of the triangular systems - - A * x = s*b, A**T * x = s*b, or A**H * x = s*b, - - with scaling to prevent overflow. Here A is an upper or lower - triangular matrix, A**T denotes the transpose of A, A**H denotes the - conjugate transpose of A, x and b are n-element vectors, and s is a - scaling factor, usually less than or equal to 1, chosen so that the - components of x will be less than the overflow threshold. If the - unscaled problem will not cause overflow, the Level 2 BLAS routine - CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), - then s is set to 0 and a non-trivial solution to A*x = 0 is returned. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - TRANS (input) CHARACTER*1 - Specifies the operation applied to A. - = 'N': Solve A * x = s*b (No transpose) - = 'T': Solve A**T * x = s*b (Transpose) - = 'C': Solve A**H * x = s*b (Conjugate transpose) - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - NORMIN (input) CHARACTER*1 - Specifies whether CNORM has been set or not. - = 'Y': CNORM contains the column norms on entry - = 'N': CNORM is not set on entry. On exit, the norms will - be computed and stored in CNORM. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input) COMPLEX array, dimension (LDA,N) - The triangular matrix A. If UPLO = 'U', the leading n by n - upper triangular part of the array A contains the upper - triangular matrix, and the strictly lower triangular part of - A is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of the array A contains the lower triangular - matrix, and the strictly upper triangular part of A is not - referenced. If DIAG = 'U', the diagonal elements of A are - also not referenced and are assumed to be 1. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max (1,N). - - X (input/output) COMPLEX array, dimension (N) - On entry, the right hand side b of the triangular system. - On exit, X is overwritten by the solution vector x. - - SCALE (output) REAL - The scaling factor s for the triangular system - A * x = s*b, A**T * x = s*b, or A**H * x = s*b. - If SCALE = 0, the matrix A is singular or badly scaled, and - the vector x is an exact or approximate solution to A*x = 0. - - CNORM (input or output) REAL array, dimension (N) - - If NORMIN = 'Y', CNORM is an input argument and CNORM(j) - contains the norm of the off-diagonal part of the j-th column - of A. If TRANS = 'N', CNORM(j) must be greater than or equal - to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) - must be greater than or equal to the 1-norm. - - If NORMIN = 'N', CNORM is an output argument and CNORM(j) - returns the 1-norm of the offdiagonal part of the j-th column - of A. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - Further Details - ======= ======= - - A rough bound on x is computed; if that is less than overflow, CTRSV - is called, otherwise, specific code is used which checks for possible - overflow or divide-by-zero at every operation. - - A columnwise scheme is used for solving A*x = b. The basic algorithm - if A is lower triangular is - - x[1:n] := b[1:n] - for j = 1, ..., n - x(j) := x(j) / A(j,j) - x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] - end - - Define bounds on the components of x after j iterations of the loop: - M(j) = bound on x[1:j] - G(j) = bound on x[j+1:n] - Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. - - Then for iteration j+1 we have - M(j+1) <= G(j) / | A(j+1,j+1) | - G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | - <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) - - where CNORM(j+1) is greater than or equal to the infinity-norm of - column j+1 of A, not counting the diagonal. Hence - - G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) - 1<=i<=j - and - - |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) - 1<=i< j - - Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the - reciprocal of the largest M(j), j=1,..,n, is larger than - max(underflow, 1/overflow). - - The bound on x(j) is also used to determine when a step in the - columnwise method can be performed without fear of overflow. If - the computed bound is greater than a large constant, x is scaled to - prevent overflow, but if the bound overflows, x is set to 0, x(j) to - 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. - - Similarly, a row-wise scheme is used to solve A**T *x = b or - A**H *x = b. The basic algorithm for A upper triangular is - - for j = 1, ..., n - x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) - end - - We simultaneously compute two bounds - G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j - M(j) = bound on x(i), 1<=i<=j - - The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we - add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. - Then the bound on x(j) is - - M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | - - <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) - 1<=i<=j - - and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater - than max(underflow, 1/overflow). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --cnorm; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - notran = lsame_(trans, "N"); - nounit = lsame_(diag, "N"); - -/* Test the input parameters. */ - - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T") && ! - lsame_(trans, "C")) { - *info = -2; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -3; - } else if (! lsame_(normin, "Y") && ! lsame_(normin, - "N")) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLATRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine machine dependent parameters to control overflow. */ - - smlnum = slamch_("Safe minimum"); - bignum = 1.f / smlnum; - slabad_(&smlnum, &bignum); - smlnum /= slamch_("Precision"); - bignum = 1.f / smlnum; - *scale = 1.f; - - if (lsame_(normin, "N")) { - -/* Compute the 1-norm of each column, not including the diagonal. */ - - if (upper) { - -/* A is upper triangular. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* A is lower triangular. */ - - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); -/* L20: */ - } - cnorm[*n] = 0.f; - } - } - -/* - Scale the column norms by TSCAL if the maximum element in CNORM is - greater than BIGNUM/2. -*/ - - imax = isamax_(n, &cnorm[1], &c__1); - tmax = cnorm[imax]; - if (tmax <= bignum * .5f) { - tscal = 1.f; - } else { - tscal = .5f / (smlnum * tmax); - sscal_(n, &tscal, &cnorm[1], &c__1); - } - -/* - Compute a bound on the computed solution vector to see if the - Level 2 BLAS routine CTRSV can be used. -*/ - - xmax = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = j; - r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 = - r_imag(&x[j]) / 2.f, dabs(r__2)); - xmax = dmax(r__3,r__4); -/* L30: */ - } - xbnd = xmax; - - if (notran) { - -/* Compute the growth in A * x = b. */ - - if (upper) { - jfirst = *n; - jlast = 1; - jinc = -1; - } else { - jfirst = 1; - jlast = *n; - jinc = 1; - } - - if (tscal != 1.f) { - grow = 0.f; - goto L60; - } - - if (nounit) { - -/* - A is non-unit triangular. - - Compute GROW = 1/G(j) and XBND = 1/M(j). - Initially, G(0) = max{x(i), i=1,...,n}. -*/ - - grow = .5f / dmax(xbnd,smlnum); - xbnd = grow; - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L60; - } - - i__3 = j + j * a_dim1; - tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - - if (tjj >= smlnum) { - -/* - M(j) = G(j-1) / abs(A(j,j)) - - Computing MIN -*/ - r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow; - xbnd = dmin(r__1,r__2); - } else { - -/* M(j) could overflow, set XBND to 0. */ - - xbnd = 0.f; - } - - if (tjj + cnorm[j] >= smlnum) { - -/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ - - grow *= tjj / (tjj + cnorm[j]); - } else { - -/* G(j) could overflow, set GROW to 0. */ - - grow = 0.f; - } -/* L40: */ - } - grow = xbnd; - } else { - -/* - A is unit triangular. - - Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. - - Computing MIN -*/ - r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); - grow = dmin(r__1,r__2); - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L60; - } - -/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ - - grow *= 1.f / (cnorm[j] + 1.f); -/* L50: */ - } - } -L60: - - ; - } else { - -/* Compute the growth in A**T * x = b or A**H * x = b. */ - - if (upper) { - jfirst = 1; - jlast = *n; - jinc = 1; - } else { - jfirst = *n; - jlast = 1; - jinc = -1; - } - - if (tscal != 1.f) { - grow = 0.f; - goto L90; - } - - if (nounit) { - -/* - A is non-unit triangular. - - Compute GROW = 1/G(j) and XBND = 1/M(j). - Initially, M(0) = max{x(i), i=1,...,n}. -*/ - - grow = .5f / dmax(xbnd,smlnum); - xbnd = grow; - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L90; - } - -/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ - - xj = cnorm[j] + 1.f; -/* Computing MIN */ - r__1 = grow, r__2 = xbnd / xj; - grow = dmin(r__1,r__2); - - i__3 = j + j * a_dim1; - tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - - if (tjj >= smlnum) { - -/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ - - if (xj > tjj) { - xbnd *= tjj / xj; - } - } else { - -/* M(j) could overflow, set XBND to 0. */ - - xbnd = 0.f; - } -/* L70: */ - } - grow = dmin(grow,xbnd); - } else { - -/* - A is unit triangular. - - Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. - - Computing MIN -*/ - r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); - grow = dmin(r__1,r__2); - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L90; - } - -/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ - - xj = cnorm[j] + 1.f; - grow /= xj; -/* L80: */ - } - } -L90: - ; - } - - if (grow * tscal > smlnum) { - -/* - Use the Level 2 BLAS solve if the reciprocal of the bound on - elements of X is not too small. -*/ - - ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); - } else { - -/* Use a Level 1 BLAS solve, scaling intermediate results. */ - - if (xmax > bignum * .5f) { - -/* - Scale X so that its components are less than or equal to - BIGNUM in absolute value. -*/ - - *scale = bignum * .5f / xmax; - csscal_(n, scale, &x[1], &c__1); - xmax = bignum; - } else { - xmax *= 2.f; - } - - if (notran) { - -/* Solve A * x = b */ - - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ - - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), - dabs(r__2)); - if (nounit) { - i__3 = j + j * a_dim1; - q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i; - tjjs.r = q__1.r, tjjs.i = q__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.f; - if (tscal == 1.f) { - goto L105; - } - } - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.f) { - if (xj > tjj * bignum) { - -/* Scale x by 1/b(j). */ - - rec = 1.f / xj; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] - ), dabs(r__2)); - } else if (tjj > 0.f) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* - Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM - to avoid overflow when dividing by A(j,j). -*/ - - rec = tjj * bignum / xj; - if (cnorm[j] > 1.f) { - -/* - Scale by 1/CNORM(j) to avoid overflow when - multiplying x(j) times column j. -*/ - - rec /= cnorm[j]; - } - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] - ), dabs(r__2)); - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0, and compute a solution to A*x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0.f, x[i__4].i = 0.f; -/* L100: */ - } - i__3 = j; - x[i__3].r = 1.f, x[i__3].i = 0.f; - xj = 1.f; - *scale = 0.f; - xmax = 0.f; - } -L105: - -/* - Scale x if necessary to avoid overflow when adding a - multiple of column j of A. -*/ - - if (xj > 1.f) { - rec = 1.f / xj; - if (cnorm[j] > (bignum - xmax) * rec) { - -/* Scale x by 1/(2*abs(x(j))). */ - - rec *= .5f; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - } - } else if (xj * cnorm[j] > bignum - xmax) { - -/* Scale x by 1/2. */ - - csscal_(n, &c_b1794, &x[1], &c__1); - *scale *= .5f; - } - - if (upper) { - if (j > 1) { - -/* - Compute the update - x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) -*/ - - i__3 = j - 1; - i__4 = j; - q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; - q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; - caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - i__3 = j - 1; - i__ = icamax_(&i__3, &x[1], &c__1); - i__3 = i__; - xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = - r_imag(&x[i__]), dabs(r__2)); - } - } else { - if (j < *n) { - -/* - Compute the update - x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) -*/ - - i__3 = *n - j; - i__4 = j; - q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; - q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; - caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - i__3 = *n - j; - i__ = j + icamax_(&i__3, &x[j + 1], &c__1); - i__3 = i__; - xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = - r_imag(&x[i__]), dabs(r__2)); - } - } -/* L110: */ - } - - } else if (lsame_(trans, "T")) { - -/* Solve A**T * x = b */ - - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Compute x(j) = b(j) - sum A(k,j)*x(k). - k<>j -*/ - - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), - dabs(r__2)); - uscal.r = tscal, uscal.i = 0.f; - rec = 1.f / dmax(xmax,1.f); - if (cnorm[j] > (bignum - xj) * rec) { - -/* If x(j) could overflow, scale x by 1/(2*XMAX). */ - - rec *= .5f; - if (nounit) { - i__3 = j + j * a_dim1; - q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] - .i; - tjjs.r = q__1.r, tjjs.i = q__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.f; - } - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - if (tjj > 1.f) { - -/* - Divide by A(j,j) when scaling x if A(j,j) > 1. - - Computing MIN -*/ - r__1 = 1.f, r__2 = rec * tjj; - rec = dmin(r__1,r__2); - cladiv_(&q__1, &uscal, &tjjs); - uscal.r = q__1.r, uscal.i = q__1.i; - } - if (rec < 1.f) { - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - - csumj.r = 0.f, csumj.i = 0.f; - if (uscal.r == 1.f && uscal.i == 0.f) { - -/* - If the scaling needed for A in the dot product is 1, - call CDOTU to perform the dot product. -*/ - - if (upper) { - i__3 = j - 1; - cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - csumj.r = q__1.r, csumj.i = q__1.i; - } else if (j < *n) { - i__3 = *n - j; - cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - csumj.r = q__1.r, csumj.i = q__1.i; - } - } else { - -/* Otherwise, use in-line code for the dot product. */ - - if (upper) { - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * a_dim1; - q__3.r = a[i__4].r * uscal.r - a[i__4].i * - uscal.i, q__3.i = a[i__4].r * uscal.i + a[ - i__4].i * uscal.r; - i__5 = i__; - q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, - q__2.i = q__3.r * x[i__5].i + q__3.i * x[ - i__5].r; - q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + - q__2.i; - csumj.r = q__1.r, csumj.i = q__1.i; -/* L120: */ - } - } else if (j < *n) { - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * a_dim1; - q__3.r = a[i__4].r * uscal.r - a[i__4].i * - uscal.i, q__3.i = a[i__4].r * uscal.i + a[ - i__4].i * uscal.r; - i__5 = i__; - q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, - q__2.i = q__3.r * x[i__5].i + q__3.i * x[ - i__5].r; - q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + - q__2.i; - csumj.r = q__1.r, csumj.i = q__1.i; -/* L130: */ - } - } - } - - q__1.r = tscal, q__1.i = 0.f; - if (uscal.r == q__1.r && uscal.i == q__1.i) { - -/* - Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) - was not used to scale the dotproduct. -*/ - - i__3 = j; - i__4 = j; - q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - - csumj.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] - ), dabs(r__2)); - if (nounit) { - i__3 = j + j * a_dim1; - q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] - .i; - tjjs.r = q__1.r, tjjs.i = q__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.f; - if (tscal == 1.f) { - goto L145; - } - } - -/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ - - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.f) { - if (xj > tjj * bignum) { - -/* Scale X by 1/abs(x(j)). */ - - rec = 1.f / xj; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } else if (tjj > 0.f) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ - - rec = tjj * bignum / xj; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0 and compute a solution to A**T *x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0.f, x[i__4].i = 0.f; -/* L140: */ - } - i__3 = j; - x[i__3].r = 1.f, x[i__3].i = 0.f; - *scale = 0.f; - xmax = 0.f; - } -L145: - ; - } else { - -/* - Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot - product has already been divided by 1/A(j,j). -*/ - - i__3 = j; - cladiv_(&q__2, &x[j], &tjjs); - q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } -/* Computing MAX */ - i__3 = j; - r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = - r_imag(&x[j]), dabs(r__2)); - xmax = dmax(r__3,r__4); -/* L150: */ - } - - } else { - -/* Solve A**H * x = b */ - - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Compute x(j) = b(j) - sum A(k,j)*x(k). - k<>j -*/ - - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), - dabs(r__2)); - uscal.r = tscal, uscal.i = 0.f; - rec = 1.f / dmax(xmax,1.f); - if (cnorm[j] > (bignum - xj) * rec) { - -/* If x(j) could overflow, scale x by 1/(2*XMAX). */ - - rec *= .5f; - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; - tjjs.r = q__1.r, tjjs.i = q__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.f; - } - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - if (tjj > 1.f) { - -/* - Divide by A(j,j) when scaling x if A(j,j) > 1. - - Computing MIN -*/ - r__1 = 1.f, r__2 = rec * tjj; - rec = dmin(r__1,r__2); - cladiv_(&q__1, &uscal, &tjjs); - uscal.r = q__1.r, uscal.i = q__1.i; - } - if (rec < 1.f) { - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - - csumj.r = 0.f, csumj.i = 0.f; - if (uscal.r == 1.f && uscal.i == 0.f) { - -/* - If the scaling needed for A in the dot product is 1, - call CDOTC to perform the dot product. -*/ - - if (upper) { - i__3 = j - 1; - cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - csumj.r = q__1.r, csumj.i = q__1.i; - } else if (j < *n) { - i__3 = *n - j; - cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - csumj.r = q__1.r, csumj.i = q__1.i; - } - } else { - -/* Otherwise, use in-line code for the dot product. */ - - if (upper) { - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - r_cnjg(&q__4, &a[i__ + j * a_dim1]); - q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, - q__3.i = q__4.r * uscal.i + q__4.i * - uscal.r; - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, - q__2.i = q__3.r * x[i__4].i + q__3.i * x[ - i__4].r; - q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + - q__2.i; - csumj.r = q__1.r, csumj.i = q__1.i; -/* L160: */ - } - } else if (j < *n) { - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - r_cnjg(&q__4, &a[i__ + j * a_dim1]); - q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, - q__3.i = q__4.r * uscal.i + q__4.i * - uscal.r; - i__4 = i__; - q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, - q__2.i = q__3.r * x[i__4].i + q__3.i * x[ - i__4].r; - q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + - q__2.i; - csumj.r = q__1.r, csumj.i = q__1.i; -/* L170: */ - } - } - } - - q__1.r = tscal, q__1.i = 0.f; - if (uscal.r == q__1.r && uscal.i == q__1.i) { - -/* - Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) - was not used to scale the dotproduct. -*/ - - i__3 = j; - i__4 = j; - q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - - csumj.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - i__3 = j; - xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] - ), dabs(r__2)); - if (nounit) { - r_cnjg(&q__2, &a[j + j * a_dim1]); - q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; - tjjs.r = q__1.r, tjjs.i = q__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.f; - if (tscal == 1.f) { - goto L185; - } - } - -/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ - - tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), - dabs(r__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.f) { - if (xj > tjj * bignum) { - -/* Scale X by 1/abs(x(j)). */ - - rec = 1.f / xj; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } else if (tjj > 0.f) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ - - rec = tjj * bignum / xj; - csscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - cladiv_(&q__1, &x[j], &tjjs); - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0 and compute a solution to A**H *x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0.f, x[i__4].i = 0.f; -/* L180: */ - } - i__3 = j; - x[i__3].r = 1.f, x[i__3].i = 0.f; - *scale = 0.f; - xmax = 0.f; - } -L185: - ; - } else { - -/* - Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot - product has already been divided by 1/A(j,j). -*/ - - i__3 = j; - cladiv_(&q__2, &x[j], &tjjs); - q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; - x[i__3].r = q__1.r, x[i__3].i = q__1.i; - } -/* Computing MAX */ - i__3 = j; - r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = - r_imag(&x[j]), dabs(r__2)); - xmax = dmax(r__3,r__4); -/* L190: */ - } - } - *scale /= tscal; - } - -/* Scale the column norms by 1/TSCAL for return. */ - - if (tscal != 1.f) { - r__1 = 1.f / tscal; - sscal_(n, &r__1, &cnorm[1], &c__1); - } - - return 0; - -/* End of CLATRS */ - -} /* clatrs_ */ - -/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - complex q__1; - - /* Local variables */ - static integer i__; - static real aii; - extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer - *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *); - static logical upper; - extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), - csscal_(integer *, real *, complex *, integer *), xerbla_(char *, - integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLAUU2 computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the unblocked form of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLAUU2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - aii = a[i__2].r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - i__3 = *n - i__; - cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ - i__ + (i__ + 1) * a_dim1], lda); - r__1 = aii * aii + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - q__1.r = aii, q__1.i = 0.f; - cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - q__1, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - } else { - csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); - } -/* L10: */ - } - - } else { - -/* Compute the product L' * L. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - aii = a[i__2].r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - i__3 = *n - i__; - cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - r__1 = aii * aii + q__1.r; - a[i__2].r = r__1, a[i__2].i = 0.f; - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__; - i__3 = i__ - 1; - q__1.r = aii, q__1.i = 0.f; - cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - q__1, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - clacgv_(&i__2, &a[i__ + a_dim1], lda); - } else { - csscal_(&i__, &aii, &a[i__ + a_dim1], lda); - } -/* L20: */ - } - } - - return 0; - -/* End of CLAUU2 */ - -} /* clauu2_ */ - -/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, ib, nb; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *), cherk_(char *, - char *, integer *, integer *, real *, complex *, integer *, real * - , complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *); - static logical upper; - extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer - *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CLAUUM computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the blocked form of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CLAUUM", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - clauu2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & - i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[ - i__ * a_dim1 + 1], lda); - clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - cgemm_("No transpose", "Conjugate transpose", &i__3, &ib, - &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, & - a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__ - * a_dim1 + 1], lda); - i__3 = *n - i__ - ib + 1; - cherk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[ - i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__ - + i__ * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the product L' * L. */ - - i__2 = *n; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & - ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[ - i__ + a_dim1], lda); - clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - cgemm_("Conjugate transpose", "No transpose", &ib, &i__3, - &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, & - a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ + - a_dim1], lda); - i__3 = *n - i__ - ib + 1; - cherk_("Lower", "Conjugate transpose", &ib, &i__3, & - c_b871, &a[i__ + ib + i__ * a_dim1], lda, &c_b871, - &a[i__ + i__ * a_dim1], lda); - } -/* L20: */ - } - } - } - - return 0; - -/* End of CLAUUM */ - -} /* clauum_ */ - -/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - complex q__1, q__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer j; - static real ajj; - extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer - *, complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *); - static logical upper; - extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), - csscal_(integer *, real *, complex *, integer *), xerbla_(char *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CPOTF2 computes the Cholesky factorization of a complex Hermitian - positive definite matrix A. - - The factorization has the form - A = U' * U , if UPLO = 'U', or - A = L * L', if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the unblocked version of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored. - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n by n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U'*U or A = L*L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, the leading minor of order k is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CPOTF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute U(J,J) and test for non-positive-definiteness. */ - - i__2 = j + j * a_dim1; - r__1 = a[i__2].r; - i__3 = j - 1; - cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] - , &c__1); - q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; - ajj = q__1.r; - if (ajj <= 0.f) { - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.f; - goto L30; - } - ajj = sqrt(ajj); - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.f; - -/* Compute elements J+1:N of row J. */ - - if (j < *n) { - i__2 = j - 1; - clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - i__3 = *n - j; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1 - + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + ( - j + 1) * a_dim1], lda); - i__2 = j - 1; - clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); - i__2 = *n - j; - r__1 = 1.f / ajj; - csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute L(J,J) and test for non-positive-definiteness. */ - - i__2 = j + j * a_dim1; - r__1 = a[i__2].r; - i__3 = j - 1; - cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); - q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; - ajj = q__1.r; - if (ajj <= 0.f) { - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.f; - goto L30; - } - ajj = sqrt(ajj); - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.f; - -/* Compute elements J+1:N of column J. */ - - if (j < *n) { - i__2 = j - 1; - clacgv_(&i__2, &a[j + a_dim1], lda); - i__2 = *n - j; - i__3 = j - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1] - , lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j * - a_dim1], &c__1); - i__2 = j - 1; - clacgv_(&i__2, &a[j + a_dim1], lda); - i__2 = *n - j; - r__1 = 1.f / ajj; - csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - goto L40; - -L30: - *info = j; - -L40: - return 0; - -/* End of CPOTF2 */ - -} /* cpotf2_ */ - -/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - complex q__1; - - /* Local variables */ - static integer j, jb, nb; - extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, - integer *, complex *, complex *, integer *, complex *, integer *, - complex *, complex *, integer *), cherk_(char *, - char *, integer *, integer *, real *, complex *, integer *, real * - , complex *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *); - static logical upper; - extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer - *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CPOTRF computes the Cholesky factorization of a complex Hermitian - positive definite matrix A. - - The factorization has the form - A = U**H * U, if UPLO = 'U', or - A = L * L**H, if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the block version of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U**H*U or A = L*L**H. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CPOTRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code. */ - - cpotf2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code. */ - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1150, & - a[j * a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1], - lda); - cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block row. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("Conjugate transpose", "No transpose", &jb, &i__3, - &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) - * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) * - a_dim1], lda); - i__3 = *n - j - jb + 1; - ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", - &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[ - j + (j + jb) * a_dim1], lda); - } -/* L10: */ - } - - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__2 = *n; - i__1 = nb; - for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - cherk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j + - a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda); - cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block column. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - q__1.r = -1.f, q__1.i = -0.f; - cgemm_("No transpose", "Conjugate transpose", &i__3, &jb, - &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j + - a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1], - lda); - i__3 = *n - j - jb + 1; - ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" - , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[ - j + jb + j * a_dim1], lda); - } -/* L20: */ - } - } - } - goto L40; - -L30: - *info = *info + j - 1; - -L40: - return 0; - -/* End of CPOTRF */ - -} /* cpotrf_ */ - -/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *), clauum_( - char *, integer *, complex *, integer *, integer *), - ctrtri_(char *, char *, integer *, complex *, integer *, integer * - ); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - CPOTRI computes the inverse of a complex Hermitian positive definite - matrix A using the Cholesky factorization A = U**H*U or A = L*L**H - computed by CPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the triangular factor U or L from the Cholesky - factorization A = U**H*U or A = L*L**H, as computed by - CPOTRF. - On exit, the upper or lower triangle of the (Hermitian) - inverse of A, overwriting the input factor U or L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the (i,i) element of the factor U or L is - zero, and the inverse could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CPOTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Invert the triangular Cholesky factor U or L. */ - - ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); - if (*info > 0) { - return 0; - } - -/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ - - clauum_(uplo, n, &a[a_offset], lda, info); - - return 0; - -/* End of CPOTRI */ - -} /* cpotri_ */ - -/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex * - a, integer *lda, complex *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CPOTRS solves a system of linear equations A*X = B with a Hermitian - positive definite matrix A using the Cholesky factorization - A = U**H*U or A = L*L**H computed by CPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) COMPLEX array, dimension (LDA,N) - The triangular factor U or L from the Cholesky factorization - A = U**H*U or A = L*L**H, as computed by CPOTRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - B (input/output) COMPLEX array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CPOTRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (upper) { - -/* - Solve A*X = B where A = U'*U. - - Solve U'*X = B, overwriting B with X. -*/ - - ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & - c_b56, &a[a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, & - a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A*X = B where A = L*L'. - - Solve L*X = B, overwriting B with X. -*/ - - ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, & - a[a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & - c_b56, &a[a_offset], lda, &b[b_offset], ldb); - } - - return 0; - -/* End of CPOTRS */ - -} /* cpotrs_ */ - -/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex * - cy, integer *incy, real *c__, real *s) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - complex q__1, q__2, q__3; - - /* Local variables */ - static integer i__, ix, iy; - static complex ctemp; - - -/* - applies a plane rotation, where the cos and sin (c and s) are real - and the vectors cx and cy are complex. - jack dongarra, linpack, 3/11/78. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; - i__3 = iy; - q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; - i__2 = iy; - i__3 = iy; - q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; - i__4 = ix; - q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; - cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; - i__2 = ix; - cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; - i__3 = i__; - q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; - q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; - ctemp.r = q__1.r, ctemp.i = q__1.i; - i__2 = i__; - i__3 = i__; - q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; - i__4 = i__; - q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; - q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; - cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; - i__2 = i__; - cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; -/* L30: */ - } - return 0; -} /* csrot_ */ - -/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e, - complex *z__, integer *ldz, complex *work, integer *lwork, real * - rwork, integer *lrwork, integer *iwork, integer *liwork, integer * - info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2, i__3, i__4; - real r__1, r__2; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k, m; - static real p; - static integer ii, ll, end, lgn; - static real eps, tiny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cswap_(integer *, complex *, integer *, - complex *, integer *); - static integer lwmin; - extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *, - complex *, integer *, complex *, integer *, real *, integer *, - integer *); - static integer start; - extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, - integer *, real *, integer *, complex *, integer *, real *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex - *, integer *, complex *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *, - integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, - real *, integer *); - static integer liwmin, icompz; - extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, - complex *, integer *, real *, integer *); - static real orgnrm; - extern doublereal slanst_(char *, integer *, real *, real *); - extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); - static integer lrwmin; - static logical lquery; - static integer smlsiz; - extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, - real *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CSTEDC computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - The eigenvectors of a full or band complex Hermitian matrix can also - be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this - matrix to tridiagonal form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See SLAED3 for details. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'I': Compute eigenvectors of tridiagonal matrix also. - = 'V': Compute eigenvectors of original Hermitian matrix - also. On entry, Z contains the unitary matrix used - to reduce the original matrix to tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the subdiagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Z (input/output) COMPLEX array, dimension (LDZ,N) - On entry, if COMPZ = 'V', then Z contains the unitary - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original Hermitian matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1. - If eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. - If COMPZ = 'V' and N > 1, LWORK must be at least N*N. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace/output) REAL array, - dimension (LRWORK) - On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. - - LRWORK (input) INTEGER - The dimension of the array RWORK. - If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. - If COMPZ = 'V' and N > 1, LRWORK must be at least - 1 + 3*N + 2*N*lg N + 3*N**2 , - where lg( N ) = smallest integer k such - that 2**k >= N. - If COMPZ = 'I' and N > 1, LRWORK must be at least - 1 + 4*N + 2*N**2 . - - If LRWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the RWORK array, - returns this value as the first entry of the RWORK array, and - no error message related to LRWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. - If COMPZ = 'V' or N > 1, LIWORK must be at least - 6 + 6*N + 5*N*lg N. - If COMPZ = 'I' or N > 1, LIWORK must be at least - 3 + 5*N . - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (*n <= 1 || icompz <= 0) { - lwmin = 1; - liwmin = 1; - lrwmin = 1; - } else { - lgn = (integer) (log((real) (*n)) / log(2.f)); - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (icompz == 1) { - lwmin = *n * *n; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; - liwmin = *n * 6 + 6 + *n * 5 * lgn; - } else if (icompz == 2) { - lwmin = 1; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); - liwmin = *n * 5 + 3; - } - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*lrwork < lrwmin && ! lquery) { - *info = -10; - } else if (*liwork < liwmin && ! lquery) { - *info = -12; - } - - if (*info == 0) { - work[1].r = (real) lwmin, work[1].i = 0.f; - rwork[1] = (real) lrwmin; - iwork[1] = liwmin; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CSTEDC", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*n == 1) { - if (icompz != 0) { - i__1 = z_dim1 + 1; - z__[i__1].r = 1.f, z__[i__1].i = 0.f; - } - return 0; - } - - smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - If the following conditional clause is removed, then the routine - will use the Divide and Conquer routine to compute only the - eigenvalues, which requires (3N + 3N**2) real workspace and - (2 + 5N + 2N lg(N)) integer workspace. - Since on many architectures SSTERF is much faster than any other - algorithm for finding eigenvalues only, it is used here - as the default. - - If COMPZ = 'N', use SSTERF to compute the eigenvalues. -*/ - - if (icompz == 0) { - ssterf_(n, &d__[1], &e[1], info); - return 0; - } - -/* - If N is smaller than the minimum divide size (SMLSIZ+1), then - solve the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompz == 0) { - ssterf_(n, &d__[1], &e[1], info); - return 0; - } else if (icompz == 2) { - csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], - info); - return 0; - } else { - csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], - info); - return 0; - } - } - -/* If COMPZ = 'I', we simply call SSTEDC instead. */ - - if (icompz == 2) { - slaset_("Full", n, n, &c_b1101, &c_b871, &rwork[1], n); - ll = *n * *n + 1; - i__1 = *lrwork - ll + 1; - sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & - iwork[1], liwork, info); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * z_dim1; - i__4 = (j - 1) * *n + i__; - z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* - From now on, only option left to be handled is COMPZ = 'V', - i.e. ICOMPZ = 1. - - Scale. -*/ - - orgnrm = slanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.f) { - return 0; - } - - eps = slamch_("Epsilon"); - - start = 1; - -/* while ( START <= N ) */ - -L30: - if (start <= *n) { - -/* - Let END be the position of the next subdiagonal entry such that - E( END ) <= TINY or END = N if no such subdiagonal exists. The - matrix identified by the elements between START and END - constitutes an independent sub-problem. -*/ - - end = start; -L40: - if (end < *n) { - tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = - d__[end + 1], dabs(r__2))); - if ((r__1 = e[end], dabs(r__1)) > tiny) { - ++end; - goto L40; - } - } - -/* (Sub) Problem determined. Compute its size and solve it. */ - - m = end - start + 1; - if (m > smlsiz) { - *info = smlsiz; - -/* Scale. */ - - orgnrm = slanst_("M", &m, &d__[start], &e[start]); - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[ - start], &m, info); - i__1 = m - 1; - i__2 = m - 1; - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[ - start], &i__2, info); - - claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1], - ldz, &work[1], n, &rwork[1], &iwork[1], info); - if (*info > 0) { - *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m - + 1) + start - 1; - return 0; - } - -/* Scale back. */ - - slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[ - start], &m, info); - - } else { - ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m * - m + 1], info); - clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & - work[1], n, &rwork[m * m + 1]); - clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz); - if (*info > 0) { - *info = start * (*n + 1) + end; - return 0; - } - } - - start = end + 1; - goto L30; - } - -/* - endwhile - - If the problem split any number of times, then the eigenvalues - will not be properly ordered. Here we permute the eigenvalues - (and the associated eigenvectors) into ascending order. -*/ - - if (m != *n) { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L50: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L60: */ - } - } - - work[1].r = (real) lwmin, work[1].i = 0.f; - rwork[1] = (real) lrwmin; - iwork[1] = liwmin; - - return 0; - -/* End of CSTEDC */ - -} /* cstedc_ */ - -/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e, - complex *z__, integer *ldz, real *work, integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static real b, c__, f, g; - static integer i__, j, k, l, m; - static real p, r__, s; - static integer l1, ii, mm, lm1, mm1, nm1; - static real rt1, rt2, eps; - static integer lsv; - static real tst, eps2; - static integer lend, jtot; - extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) - ; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int clasr_(char *, char *, char *, integer *, - integer *, real *, real *, complex *, integer *); - static real anorm; - extern /* Subroutine */ int cswap_(integer *, complex *, integer *, - complex *, integer *); - static integer lendm1, lendp1; - extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * - , real *, real *); - extern doublereal slapy2_(real *, real *); - static integer iscale; - extern doublereal slamch_(char *); - extern /* Subroutine */ int claset_(char *, integer *, integer *, complex - *, complex *, complex *, integer *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real safmax; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - static integer lendsv; - extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * - ); - static real ssfmin; - static integer nmaxit, icompz; - static real ssfmax; - extern doublereal slanst_(char *, integer *, real *, real *); - extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CSTEQR computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the implicit QL or QR method. - The eigenvectors of a full or band complex Hermitian matrix can also - be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this - matrix to tridiagonal form. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'V': Compute eigenvalues and eigenvectors of the original - Hermitian matrix. On entry, Z must contain the - unitary matrix used to reduce the original matrix - to tridiagonal form. - = 'I': Compute eigenvalues and eigenvectors of the - tridiagonal matrix. Z is initialized to the identity - matrix. - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - Z (input/output) COMPLEX array, dimension (LDZ, N) - On entry, if COMPZ = 'V', then Z contains the unitary - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original Hermitian matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1, and if - eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace) REAL array, dimension (max(1,2*N-2)) - If COMPZ = 'N', then WORK is not referenced. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm has failed to find all the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero; on exit, D - and E contain the elements of a symmetric tridiagonal - matrix which is unitarily similar to the original - matrix. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - *info = 0; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CSTEQR", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - if (icompz == 2) { - i__1 = z_dim1 + 1; - z__[i__1].r = 1.f, z__[i__1].i = 0.f; - } - return 0; - } - -/* Determine the unit roundoff and over/underflow thresholds. */ - - eps = slamch_("E"); -/* Computing 2nd power */ - r__1 = eps; - eps2 = r__1 * r__1; - safmin = slamch_("S"); - safmax = 1.f / safmin; - ssfmax = sqrt(safmax) / 3.f; - ssfmin = sqrt(safmin) / eps2; - -/* - Compute the eigenvalues and eigenvectors of the tridiagonal - matrix. -*/ - - if (icompz == 2) { - claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz); - } - - nmaxit = *n * 30; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - nm1 = *n - 1; - -L10: - if (l1 > *n) { - goto L160; - } - if (l1 > 1) { - e[l1 - 1] = 0.f; - } - if (l1 <= nm1) { - i__1 = nm1; - for (m = l1; m <= i__1; ++m) { - tst = (r__1 = e[m], dabs(r__1)); - if (tst == 0.f) { - goto L30; - } - if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m - + 1], dabs(r__2))) * eps) { - e[m] = 0.f; - goto L30; - } -/* L20: */ - } - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = slanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm == 0.f) { - goto L10; - } - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - -/* Choose between QL and QR iteration */ - - if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { - lend = lsv; - l = lendsv; - } - - if (lend > l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L40: - if (l != lend) { - lendm1 = lend - 1; - i__1 = lendm1; - for (m = l; m <= i__1; ++m) { -/* Computing 2nd power */ - r__2 = (r__1 = e[m], dabs(r__1)); - tst = r__2 * r__2; - if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m - + 1], dabs(r__2)) + safmin) { - goto L60; - } -/* L50: */ - } - } - - m = lend; - -L60: - if (m < lend) { - e[m] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L80; - } - -/* - If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l + 1) { - if (icompz > 0) { - slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); - work[l] = c__; - work[*n - 1 + l] = s; - clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & - z__[l * z_dim1 + 1], ldz); - } else { - slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); - } - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.f; - l += 2; - if (l <= lend) { - goto L40; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l + 1] - p) / (e[l] * 2.f); - r__ = slapy2_(&g, &c_b871); - g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); - - s = 1.f; - c__ = 1.f; - p = 0.f; - -/* Inner loop */ - - mm1 = m - 1; - i__1 = l; - for (i__ = mm1; i__ >= i__1; --i__) { - f = s * e[i__]; - b = c__ * e[i__]; - slartg_(&g, &f, &c__, &s, &r__); - if (i__ != m - 1) { - e[i__ + 1] = r__; - } - g = d__[i__ + 1] - p; - r__ = (d__[i__] - g) * s + c__ * 2.f * b; - p = s * r__; - d__[i__ + 1] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = -s; - } - -/* L70: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = m - l + 1; - clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[l] = g; - goto L40; - -/* Eigenvalue found. */ - -L80: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L40; - } - goto L140; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L90: - if (l != lend) { - lendp1 = lend + 1; - i__1 = lendp1; - for (m = l; m >= i__1; --m) { -/* Computing 2nd power */ - r__2 = (r__1 = e[m - 1], dabs(r__1)); - tst = r__2 * r__2; - if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m - - 1], dabs(r__2)) + safmin) { - goto L110; - } -/* L100: */ - } - } - - m = lend; - -L110: - if (m > lend) { - e[m - 1] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L130; - } - -/* - If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l - 1) { - if (icompz > 0) { - slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) - ; - work[m] = c__; - work[*n - 1 + m] = s; - clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & - z__[(l - 1) * z_dim1 + 1], ldz); - } else { - slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); - } - d__[l - 1] = rt1; - d__[l] = rt2; - e[l - 1] = 0.f; - l += -2; - if (l >= lend) { - goto L90; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l - 1] - p) / (e[l - 1] * 2.f); - r__ = slapy2_(&g, &c_b871); - g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); - - s = 1.f; - c__ = 1.f; - p = 0.f; - -/* Inner loop */ - - lm1 = l - 1; - i__1 = lm1; - for (i__ = m; i__ <= i__1; ++i__) { - f = s * e[i__]; - b = c__ * e[i__]; - slartg_(&g, &f, &c__, &s, &r__); - if (i__ != m) { - e[i__ - 1] = r__; - } - g = d__[i__] - p; - r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; - p = s * r__; - d__[i__] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = s; - } - -/* L120: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = l - m + 1; - clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[lm1] = g; - goto L90; - -/* Eigenvalue found. */ - -L130: - d__[l] = p; - - --l; - if (l >= lend) { - goto L90; - } - goto L140; - - } - -/* Undo scaling if necessary */ - -L140: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } else if (iscale == 2) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot == nmaxit) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.f) { - ++(*info); - } -/* L150: */ - } - return 0; - } - goto L10; - -/* Order eigenvalues and eigenvectors. */ - -L160: - if (icompz == 0) { - -/* Use Quick Sort */ - - slasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L170: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L180: */ - } - } - return 0; - -/* End of CSTEQR */ - -} /* csteqr_ */ - -/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select, - integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, - complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, - real *rwork, integer *info) -{ - /* System generated locals */ - integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4, i__5; - real r__1, r__2, r__3; - complex q__1, q__2; - - /* Builtin functions */ - double r_imag(complex *); - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, k, ii, ki, is; - static real ulp; - static logical allv; - static real unfl, ovfl, smin; - static logical over; - static real scale; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * - , complex *, integer *, complex *, integer *, complex *, complex * - , integer *); - static real remax; - extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, - complex *, integer *); - static logical leftv, bothv, somev; - extern /* Subroutine */ int slabad_(real *, real *); - extern integer icamax_(integer *, complex *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer - *), xerbla_(char *, integer *), clatrs_(char *, char *, - char *, char *, integer *, complex *, integer *, complex *, real * - , real *, integer *); - extern doublereal scasum_(integer *, complex *, integer *); - static logical rightv; - static real smlnum; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CTREVC computes some or all of the right and/or left eigenvectors of - a complex upper triangular matrix T. - - The right eigenvector x and the left eigenvector y of T corresponding - to an eigenvalue w are defined by: - - T*x = w*x, y'*T = w*y' - - where y' denotes the conjugate transpose of the vector y. - - If all eigenvectors are requested, the routine may either return the - matrices X and/or Y of right or left eigenvectors of T, or the - products Q*X and/or Q*Y, where Q is an input unitary - matrix. If T was obtained from the Schur factorization of an - original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of - right or left eigenvectors of A. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'R': compute right eigenvectors only; - = 'L': compute left eigenvectors only; - = 'B': compute both right and left eigenvectors. - - HOWMNY (input) CHARACTER*1 - = 'A': compute all right and/or left eigenvectors; - = 'B': compute all right and/or left eigenvectors, - and backtransform them using the input matrices - supplied in VR and/or VL; - = 'S': compute selected right and/or left eigenvectors, - specified by the logical array SELECT. - - SELECT (input) LOGICAL array, dimension (N) - If HOWMNY = 'S', SELECT specifies the eigenvectors to be - computed. - If HOWMNY = 'A' or 'B', SELECT is not referenced. - To select the eigenvector corresponding to the j-th - eigenvalue, SELECT(j) must be set to .TRUE.. - - N (input) INTEGER - The order of the matrix T. N >= 0. - - T (input/output) COMPLEX array, dimension (LDT,N) - The upper triangular matrix T. T is modified, but restored - on exit. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= max(1,N). - - VL (input/output) COMPLEX array, dimension (LDVL,MM) - On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must - contain an N-by-N matrix Q (usually the unitary matrix Q of - Schur vectors returned by CHSEQR). - On exit, if SIDE = 'L' or 'B', VL contains: - if HOWMNY = 'A', the matrix Y of left eigenvectors of T; - VL is lower triangular. The i-th column - VL(i) of VL is the eigenvector corresponding - to T(i,i). - if HOWMNY = 'B', the matrix Q*Y; - if HOWMNY = 'S', the left eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VL, in the same order as their - eigenvalues. - If SIDE = 'R', VL is not referenced. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= max(1,N) if - SIDE = 'L' or 'B'; LDVL >= 1 otherwise. - - VR (input/output) COMPLEX array, dimension (LDVR,MM) - On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must - contain an N-by-N matrix Q (usually the unitary matrix Q of - Schur vectors returned by CHSEQR). - On exit, if SIDE = 'R' or 'B', VR contains: - if HOWMNY = 'A', the matrix X of right eigenvectors of T; - VR is upper triangular. The i-th column - VR(i) of VR is the eigenvector corresponding - to T(i,i). - if HOWMNY = 'B', the matrix Q*X; - if HOWMNY = 'S', the right eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VR, in the same order as their - eigenvalues. - If SIDE = 'L', VR is not referenced. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= max(1,N) if - SIDE = 'R' or 'B'; LDVR >= 1 otherwise. - - MM (input) INTEGER - The number of columns in the arrays VL and/or VR. MM >= M. - - M (output) INTEGER - The number of columns in the arrays VL and/or VR actually - used to store the eigenvectors. If HOWMNY = 'A' or 'B', M - is set to N. Each selected eigenvector occupies one - column. - - WORK (workspace) COMPLEX array, dimension (2*N) - - RWORK (workspace) REAL array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The algorithm used in this program is basically backward (forward) - substitution, with scaling to make the the code robust against - possible overflow. - - Each eigenvector is normalized so that the element of largest - magnitude has magnitude 1; here the magnitude of a complex number - (x,y) is taken to be |x| + |y|. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - --select; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - --rwork; - - /* Function Body */ - bothv = lsame_(side, "B"); - rightv = lsame_(side, "R") || bothv; - leftv = lsame_(side, "L") || bothv; - - allv = lsame_(howmny, "A"); - over = lsame_(howmny, "B"); - somev = lsame_(howmny, "S"); - -/* - Set M to the number of columns required to store the selected - eigenvectors. -*/ - - if (somev) { - *m = 0; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (select[j]) { - ++(*m); - } -/* L10: */ - } - } else { - *m = *n; - } - - *info = 0; - if (! rightv && ! leftv) { - *info = -1; - } else if (! allv && ! over && ! somev) { - *info = -2; - } else if (*n < 0) { - *info = -4; - } else if (*ldt < max(1,*n)) { - *info = -6; - } else if (*ldvl < 1 || leftv && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || rightv && *ldvr < *n) { - *info = -10; - } else if (*mm < *m) { - *info = -11; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CTREVC", &i__1); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* Set the constants to control overflow. */ - - unfl = slamch_("Safe minimum"); - ovfl = 1.f / unfl; - slabad_(&unfl, &ovfl); - ulp = slamch_("Precision"); - smlnum = unfl * (*n / ulp); - -/* Store the diagonal elements of T in working array WORK. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + *n; - i__3 = i__ + i__ * t_dim1; - work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; -/* L20: */ - } - -/* - Compute 1-norm of each column of strictly upper triangular - part of T to control overflow in triangular solver. -*/ - - rwork[1] = 0.f; - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1); -/* L30: */ - } - - if (rightv) { - -/* Compute right eigenvectors. */ - - is = *m; - for (ki = *n; ki >= 1; --ki) { - - if (somev) { - if (! select[ki]) { - goto L80; - } - } -/* Computing MAX */ - i__1 = ki + ki * t_dim1; - r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[ - ki + ki * t_dim1]), dabs(r__2))); - smin = dmax(r__3,smlnum); - - work[1].r = 1.f, work[1].i = 0.f; - -/* Form right-hand side. */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k; - i__3 = k + ki * t_dim1; - q__1.r = -t[i__3].r, q__1.i = -t[i__3].i; - work[i__2].r = q__1.r, work[i__2].i = q__1.i; -/* L40: */ - } - -/* - Solve the triangular system: - (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. -*/ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + k * t_dim1; - i__3 = k + k * t_dim1; - i__4 = ki + ki * t_dim1; - q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4] - .i; - t[i__2].r = q__1.r, t[i__2].i = q__1.i; - i__2 = k + k * t_dim1; - if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * - t_dim1]), dabs(r__2)) < smin) { - i__3 = k + k * t_dim1; - t[i__3].r = smin, t[i__3].i = 0.f; - } -/* L50: */ - } - - if (ki > 1) { - i__1 = ki - 1; - clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ - t_offset], ldt, &work[1], &scale, &rwork[1], info); - i__1 = ki; - work[i__1].r = scale, work[i__1].i = 0.f; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); - - ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); - i__1 = ii + is * vr_dim1; - remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = - r_imag(&vr[ii + is * vr_dim1]), dabs(r__2))); - csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - i__2 = k + is * vr_dim1; - vr[i__2].r = 0.f, vr[i__2].i = 0.f; -/* L60: */ - } - } else { - if (ki > 1) { - i__1 = ki - 1; - q__1.r = scale, q__1.i = 0.f; - cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[ - 1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1); - } - - ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1); - i__1 = ii + ki * vr_dim1; - remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = - r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2))); - csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - -/* Set back the original diagonal elements of T. */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + k * t_dim1; - i__3 = k + *n; - t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; -/* L70: */ - } - - --is; -L80: - ; - } - } - - if (leftv) { - -/* Compute left eigenvectors. */ - - is = 1; - i__1 = *n; - for (ki = 1; ki <= i__1; ++ki) { - - if (somev) { - if (! select[ki]) { - goto L130; - } - } -/* Computing MAX */ - i__2 = ki + ki * t_dim1; - r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[ - ki + ki * t_dim1]), dabs(r__2))); - smin = dmax(r__3,smlnum); - - i__2 = *n; - work[i__2].r = 1.f, work[i__2].i = 0.f; - -/* Form right-hand side. */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k; - r_cnjg(&q__2, &t[ki + k * t_dim1]); - q__1.r = -q__2.r, q__1.i = -q__2.i; - work[i__3].r = q__1.r, work[i__3].i = q__1.i; -/* L90: */ - } - -/* - Solve the triangular system: - (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. -*/ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k + k * t_dim1; - i__4 = k + k * t_dim1; - i__5 = ki + ki * t_dim1; - q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5] - .i; - t[i__3].r = q__1.r, t[i__3].i = q__1.i; - i__3 = k + k * t_dim1; - if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * - t_dim1]), dabs(r__2)) < smin) { - i__4 = k + k * t_dim1; - t[i__4].r = smin, t[i__4].i = 0.f; - } -/* L100: */ - } - - if (ki < *n) { - i__2 = *n - ki; - clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & - i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + - 1], &scale, &rwork[1], info); - i__2 = ki; - work[i__2].r = scale, work[i__2].i = 0.f; - } - -/* Copy the vector x or Q*x to VL and normalize. */ - - if (! over) { - i__2 = *n - ki + 1; - ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) - ; - - i__2 = *n - ki + 1; - ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; - i__2 = ii + is * vl_dim1; - remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = - r_imag(&vl[ii + is * vl_dim1]), dabs(r__2))); - i__2 = *n - ki + 1; - csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + is * vl_dim1; - vl[i__3].r = 0.f, vl[i__3].i = 0.f; -/* L110: */ - } - } else { - if (ki < *n) { - i__2 = *n - ki; - q__1.r = scale, q__1.i = 0.f; - cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1], - ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki * - vl_dim1 + 1], &c__1); - } - - ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1); - i__2 = ii + ki * vl_dim1; - remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = - r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2))); - csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - } - -/* Set back the original diagonal elements of T. */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k + k * t_dim1; - i__4 = k + *n; - t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; -/* L120: */ - } - - ++is; -L130: - ; - } - } - - return 0; - -/* End of CTREVC */ - -} /* ctrevc_ */ - -/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - complex q__1; - - /* Builtin functions */ - void c_div(complex *, complex *, complex *); - - /* Local variables */ - static integer j; - static complex ajj; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *); - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, - complex *, integer *, complex *, integer *), xerbla_(char *, integer *); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CTRTI2 computes the inverse of a complex upper or lower triangular - matrix. - - This is the Level 2 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading n by n upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CTRTI2", &i__1); - return 0; - } - - if (upper) { - -/* Compute inverse of upper triangular matrix. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (nounit) { - i__2 = j + j * a_dim1; - c_div(&q__1, &c_b56, &a[j + j * a_dim1]); - a[i__2].r = q__1.r, a[i__2].i = q__1.i; - i__2 = j + j * a_dim1; - q__1.r = -a[i__2].r, q__1.i = -a[i__2].i; - ajj.r = q__1.r, ajj.i = q__1.i; - } else { - q__1.r = -1.f, q__1.i = -0.f; - ajj.r = q__1.r, ajj.i = q__1.i; - } - -/* Compute elements 1:j-1 of j-th column. */ - - i__2 = j - 1; - ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & - a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* Compute inverse of lower triangular matrix. */ - - for (j = *n; j >= 1; --j) { - if (nounit) { - i__1 = j + j * a_dim1; - c_div(&q__1, &c_b56, &a[j + j * a_dim1]); - a[i__1].r = q__1.r, a[i__1].i = q__1.i; - i__1 = j + j * a_dim1; - q__1.r = -a[i__1].r, q__1.i = -a[i__1].i; - ajj.r = q__1.r, ajj.i = q__1.i; - } else { - q__1.r = -1.f, q__1.i = -0.f; - ajj.r = q__1.r, ajj.i = q__1.i; - } - if (j < *n) { - -/* Compute elements j+1:n of j-th column. */ - - i__1 = *n - j; - ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + - 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); - i__1 = *n - j; - cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - - return 0; - -/* End of CTRTI2 */ - -} /* ctrti2_ */ - -/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; - complex q__1; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer j, jb, nb, nn; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, - integer *, integer *, complex *, complex *, integer *, complex *, - integer *), ctrsm_(char *, char *, - char *, char *, integer *, integer *, complex *, complex *, - integer *, complex *, integer *); - static logical upper; - extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, - integer *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CTRTRI computes the inverse of a complex upper or lower triangular - matrix A. - - This is the Level 3 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': A is upper triangular; - = 'L': A is lower triangular. - - DIAG (input) CHARACTER*1 - = 'N': A is non-unit triangular; - = 'U': A is unit triangular. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, A(i,i) is exactly zero. The triangular - matrix is singular and its inverse can not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CTRTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Check for singularity if non-unit. */ - - if (nounit) { - i__1 = *n; - for (*info = 1; *info <= i__1; ++(*info)) { - i__2 = *info + *info * a_dim1; - if (a[i__2].r == 0.f && a[i__2].i == 0.f) { - return 0; - } -/* L10: */ - } - *info = 0; - } - -/* - Determine the block size for this environment. - - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = uplo; - i__3[1] = 1, a__1[1] = diag; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - ctrti2_(uplo, diag, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute inverse of upper triangular matrix */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *n - j + 1; - jb = min(i__4,i__5); - -/* Compute rows 1:j-1 of current block column */ - - i__4 = j - 1; - ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & - c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); - i__4 = j - 1; - q__1.r = -1.f, q__1.i = -0.f; - ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & - q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], - lda); - -/* Compute inverse of current diagonal block */ - - ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L20: */ - } - } else { - -/* Compute inverse of lower triangular matrix */ - - nn = (*n - 1) / nb * nb + 1; - i__2 = -nb; - for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { -/* Computing MIN */ - i__1 = nb, i__4 = *n - j + 1; - jb = min(i__1,i__4); - if (j + jb <= *n) { - -/* Compute rows j+jb:n of current block column */ - - i__1 = *n - j - jb + 1; - ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, - &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j - + jb + j * a_dim1], lda); - i__1 = *n - j - jb + 1; - q__1.r = -1.f, q__1.i = -0.f; - ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, - &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j * - a_dim1], lda); - } - -/* Compute inverse of current diagonal block */ - - ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L30: */ - } - } - } - - return 0; - -/* End of CTRTRI */ - -} /* ctrtri_ */ - -/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a, - integer *lda, complex *tau, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - complex q__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), clarf_(char *, integer *, integer *, complex *, - integer *, complex *, complex *, integer *, complex *), - xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CUNG2R generates an m by n complex matrix Q with orthonormal columns, - which is defined as the first n columns of a product of k elementary - reflectors of order m - - Q = H(1) H(2) . . . H(k) - - as returned by CGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by CGEQRF in the first k columns of its array - argument A. - On exit, the m by n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQRF. - - WORK (workspace) COMPLEX array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNG2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - -/* Initialise columns k+1:n to columns of the unit matrix */ - - i__1 = *n; - for (j = *k + 1; j <= i__1; ++j) { - i__2 = *m; - for (l = 1; l <= i__2; ++l) { - i__3 = l + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L10: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; -/* L20: */ - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the left */ - - if (i__ < *n) { - i__1 = i__ + i__ * a_dim1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - i__1 = *m - i__ + 1; - i__2 = *n - i__; - clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - } - if (i__ < *m) { - i__1 = *m - i__; - i__2 = i__; - q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; - cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - } - i__1 = i__ + i__ * a_dim1; - i__2 = i__; - q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i; - a[i__1].r = q__1.r, a[i__1].i = q__1.i; - -/* Set A(1:i-1,i) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - i__2 = l + i__ * a_dim1; - a[i__2].r = 0.f, a[i__2].i = 0.f; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of CUNG2R */ - -} /* cung2r_ */ - -/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k, - complex *a, integer *lda, complex *tau, complex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, mn; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical wantq; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int cunglq_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *, integer *), - cungqr_(integer *, integer *, integer *, complex *, integer *, - complex *, complex *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNGBR generates one of the complex unitary matrices Q or P**H - determined by CGEBRD when reducing a complex matrix A to bidiagonal - form: A = Q * B * P**H. Q and P**H are defined as products of - elementary reflectors H(i) or G(i) respectively. - - If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q - is of order M: - if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n - columns of Q, where m >= n >= k; - if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an - M-by-M matrix. - - If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H - is of order N: - if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m - rows of P**H, where n >= m >= k; - if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as - an N-by-N matrix. - - Arguments - ========= - - VECT (input) CHARACTER*1 - Specifies whether the matrix Q or the matrix P**H is - required, as defined in the transformation applied by CGEBRD: - = 'Q': generate Q; - = 'P': generate P**H. - - M (input) INTEGER - The number of rows of the matrix Q or P**H to be returned. - M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q or P**H to be returned. - N >= 0. - If VECT = 'Q', M >= N >= min(M,K); - if VECT = 'P', N >= M >= min(N,K). - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original M-by-K - matrix reduced by CGEBRD. - If VECT = 'P', the number of rows in the original K-by-N - matrix reduced by CGEBRD. - K >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by CGEBRD. - On exit, the M-by-N matrix Q or P**H. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= M. - - TAU (input) COMPLEX array, dimension - (min(M,K)) if VECT = 'Q' - (min(N,K)) if VECT = 'P' - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i), which determines Q or P**H, as - returned by CGEBRD in its array argument TAUQ or TAUP. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,min(M,N)). - For optimum performance LWORK >= min(M,N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - wantq = lsame_(vect, "Q"); - mn = min(*m,*n); - lquery = *lwork == -1; - if (! wantq && ! lsame_(vect, "P")) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( - *m > *n || *m < min(*n,*k))) { - *info = -3; - } else if (*k < 0) { - *info = -4; - } else if (*lda < max(1,*m)) { - *info = -6; - } else if (*lwork < max(1,mn) && ! lquery) { - *info = -9; - } - - if (*info == 0) { - if (wantq) { - nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } else { - nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } - lwkopt = max(1,mn) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNGBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - if (wantq) { - -/* - Form Q, determined by a call to CGEBRD to reduce an m-by-k - matrix -*/ - - if (*m >= *k) { - -/* If m >= k, assume m >= n >= k */ - - cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If m < k, assume m = n - - Shift the vectors which define the elementary reflectors one - column to the right, and set the first row and column of Q - to those of the unit matrix -*/ - - for (j = *m; j >= 2; --j) { - i__1 = j * a_dim1 + 1; - a[i__1].r = 0.f, a[i__1].i = 0.f; - i__1 = *m; - for (i__ = j + 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__ + (j - 1) * a_dim1; - a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; -/* L10: */ - } -/* L20: */ - } - i__1 = a_dim1 + 1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - i__1 = *m; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = i__ + a_dim1; - a[i__2].r = 0.f, a[i__2].i = 0.f; -/* L30: */ - } - if (*m > 1) { - -/* Form Q(2:m,2:m) */ - - i__1 = *m - 1; - i__2 = *m - 1; - i__3 = *m - 1; - cungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } else { - -/* - Form P', determined by a call to CGEBRD to reduce a k-by-n - matrix -*/ - - if (*k < *n) { - -/* If k < n, assume k <= m <= n */ - - cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If k >= n, assume m = n - - Shift the vectors which define the elementary reflectors one - row downward, and set the first row and column of P' to - those of the unit matrix -*/ - - i__1 = a_dim1 + 1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = i__ + a_dim1; - a[i__2].r = 0.f, a[i__2].i = 0.f; -/* L40: */ - } - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - for (i__ = j - 1; i__ >= 2; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__ - 1 + j * a_dim1; - a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; -/* L50: */ - } - i__2 = j * a_dim1 + 1; - a[i__2].r = 0.f, a[i__2].i = 0.f; -/* L60: */ - } - if (*n > 1) { - -/* Form P'(2:n,2:n) */ - - i__1 = *n - 1; - i__2 = *n - 1; - i__3 = *n - 1; - cunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNGBR */ - -} /* cungbr_ */ - -/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex * - a, integer *lda, complex *tau, complex *work, integer *lwork, integer - *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, nb, nh, iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNGHR generates a complex unitary matrix Q which is defined as the - product of IHI-ILO elementary reflectors of order N, as returned by - CGEHRD: - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Arguments - ========= - - N (input) INTEGER - The order of the matrix Q. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - ILO and IHI must have the same values as in the previous call - of CGEHRD. Q is equal to the unit matrix except in the - submatrix Q(ilo+1:ihi,ilo+1:ihi). - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by CGEHRD. - On exit, the N-by-N unitary matrix Q. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (input) COMPLEX array, dimension (N-1) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEHRD. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= IHI-ILO. - For optimum performance LWORK >= (IHI-ILO)*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nh = *ihi - *ilo; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,nh) && ! lquery) { - *info = -8; - } - - if (*info == 0) { - nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( - ftnlen)1); - lwkopt = max(1,nh) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNGHR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - -/* - Shift the vectors which define the elementary reflectors one - column to the right, and set the first ilo and the last n-ihi - rows and columns to those of the unit matrix -*/ - - i__1 = *ilo + 1; - for (j = *ihi; j >= i__1; --j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L10: */ - } - i__2 = *ihi; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + (j - 1) * a_dim1; - a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; -/* L20: */ - } - i__2 = *n; - for (i__ = *ihi + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L30: */ - } -/* L40: */ - } - i__1 = *ilo; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L50: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; -/* L60: */ - } - i__1 = *n; - for (j = *ihi + 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L70: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; -/* L80: */ - } - - if (nh > 0) { - -/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ - - cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* - ilo], &work[1], lwork, &iinfo); - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNGHR */ - -} /* cunghr_ */ - -/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a, - integer *lda, complex *tau, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - complex q__1, q__2; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int cscal_(integer *, complex *, complex *, - integer *), clarf_(char *, integer *, integer *, complex *, - integer *, complex *, complex *, integer *, complex *), - clacgv_(integer *, complex *, integer *), xerbla_(char *, integer - *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, - which is defined as the first m rows of a product of k elementary - reflectors of order n - - Q = H(k)' . . . H(2)' H(1)' - - as returned by CGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by CGELQF in the first k rows of its array argument A. - On exit, the m by n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGELQF. - - WORK (workspace) COMPLEX array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNGL2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - return 0; - } - - if (*k < *m) { - -/* Initialise rows k+1:m to rows of the unit matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (l = *k + 1; l <= i__2; ++l) { - i__3 = l + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L10: */ - } - if (j > *k && j <= *m) { - i__2 = j + j * a_dim1; - a[i__2].r = 1.f, a[i__2].i = 0.f; - } -/* L20: */ - } - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i)' to A(i:m,i:n) from the right */ - - if (i__ < *n) { - i__1 = *n - i__; - clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); - if (i__ < *m) { - i__1 = i__ + i__ * a_dim1; - a[i__1].r = 1.f, a[i__1].i = 0.f; - i__1 = *m - i__; - i__2 = *n - i__ + 1; - r_cnjg(&q__1, &tau[i__]); - clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & - q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__1 = *n - i__; - i__2 = i__; - q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; - cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda); - i__1 = *n - i__; - clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); - } - i__1 = i__ + i__ * a_dim1; - r_cnjg(&q__2, &tau[i__]); - q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; - a[i__1].r = q__1.r, a[i__1].i = q__1.i; - -/* Set A(i,1:i-1,i) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - i__2 = i__ + l * a_dim1; - a[i__2].r = 0.f, a[i__2].i = 0.f; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of CUNGL2 */ - -} /* cungl2_ */ - -/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a, - integer *lda, complex *tau, complex *work, integer *lwork, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int cungl2_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *), clarfb_( - char *, char *, char *, char *, integer *, integer *, integer *, - complex *, integer *, complex *, integer *, complex *, integer *, - complex *, integer *), clarft_( - char *, char *, integer *, integer *, complex *, integer *, - complex *, complex *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, - which is defined as the first M rows of a product of K elementary - reflectors of order N - - Q = H(k)' . . . H(2)' H(1)' - - as returned by CGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by CGELQF in the first k rows of its array argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGELQF. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit; - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*m) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNGLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk rows are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(kk+1:m,1:kk) to zero. */ - - i__1 = kk; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = kk + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *m) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *n - i__ + 1; - clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i+ib:m,i:n) from the right */ - - i__2 = *m - i__ - ib + 1; - i__3 = *n - i__ + 1; - clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", - &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ - ib + 1], &ldwork); - } - -/* Apply H' to columns i:n of current block */ - - i__2 = *n - i__ + 1; - cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set columns 1:i-1 of current block to zero */ - - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = i__ + ib - 1; - for (l = i__; l <= i__3; ++l) { - i__4 = l + j * a_dim1; - a[i__4].r = 0.f, a[i__4].i = 0.f; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1].r = (real) iws, work[1].i = 0.f; - return 0; - -/* End of CUNGLQ */ - -} /* cunglq_ */ - -/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a, - integer *lda, complex *tau, complex *work, integer *lwork, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int cung2r_(integer *, integer *, integer *, - complex *, integer *, complex *, complex *, integer *), clarfb_( - char *, char *, char *, char *, integer *, integer *, integer *, - complex *, integer *, complex *, integer *, complex *, integer *, - complex *, integer *), clarft_( - char *, char *, integer *, integer *, complex *, integer *, - complex *, complex *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, - which is defined as the first N columns of a product of K elementary - reflectors of order M - - Q = H(1) H(2) . . . H(k) - - as returned by CGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) COMPLEX array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by CGEQRF in the first k columns of its array - argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQRF. - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*n) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNGQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk columns are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(1:kk,kk+1:n) to zero. */ - - i__1 = *n; - for (j = kk + 1; j <= i__1; ++j) { - i__2 = kk; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0.f, a[i__3].i = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *n) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *m - i__ + 1; - clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i:m,i+ib:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__ - ib + 1; - clarfb_("Left", "No transpose", "Forward", "Columnwise", & - i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & - work[ib + 1], &ldwork); - } - -/* Apply H to rows i:m of current block */ - - i__2 = *m - i__ + 1; - cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set rows 1:i-1 of current block to zero */ - - i__2 = i__ + ib - 1; - for (j = i__; j <= i__2; ++j) { - i__3 = i__ - 1; - for (l = 1; l <= i__3; ++l) { - i__4 = l + j * a_dim1; - a[i__4].r = 0.f, a[i__4].i = 0.f; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1].r = (real) iws, work[1].i = 0.f; - return 0; - -/* End of CUNGQR */ - -} /* cungqr_ */ - -/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, i1, i2, i3, mi, ni, nq; - static complex aii; - static logical left; - static complex taui; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CUNM2L overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQLF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNM2L", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ - - mi = *m - *k + i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ - - ni = *n - *k + i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } else { - r_cnjg(&q__1, &tau[i__]); - taui.r = q__1.r, taui.i = q__1.i; - } - i__3 = nq - *k + i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = nq - *k + i__ + i__ * a_dim1; - a[i__3].r = 1.f, a[i__3].i = 0.f; - clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ - c_offset], ldc, &work[1]); - i__3 = nq - *k + i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; -/* L10: */ - } - return 0; - -/* End of CUNM2L */ - -} /* cunm2l_ */ - -/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static complex aii; - static logical left; - static complex taui; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CUNM2R overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQRF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNM2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } else { - r_cnjg(&q__1, &tau[i__]); - taui.r = q__1.r, taui.i = q__1.i; - } - i__3 = i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = i__ + i__ * a_dim1; - a[i__3].r = 1.f, a[i__3].i = 0.f; - clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic - + jc * c_dim1], ldc, &work[1]); - i__3 = i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; -/* L10: */ - } - return 0; - -/* End of CUNM2R */ - -} /* cunm2r_ */ - -/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m, - integer *n, integer *k, complex *a, integer *lda, complex *tau, - complex *c__, integer *ldc, complex *work, integer *lwork, integer * - info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *); - static logical notran; - extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *); - static logical applyq; - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': P * C C * P - TRANS = 'C': P**H * C C * P**H - - Here Q and P**H are the unitary matrices determined by CGEBRD when - reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q - and P**H are defined as products of elementary reflectors H(i) and - G(i) respectively. - - Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the - order of the unitary matrix Q or P**H that is applied. - - If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: - if nq >= k, Q = H(1) H(2) . . . H(k); - if nq < k, Q = H(1) H(2) . . . H(nq-1). - - If VECT = 'P', A is assumed to have been a K-by-NQ matrix: - if k < nq, P = G(1) G(2) . . . G(k); - if k >= nq, P = G(1) G(2) . . . G(nq-1). - - Arguments - ========= - - VECT (input) CHARACTER*1 - = 'Q': apply Q or Q**H; - = 'P': apply P or P**H. - - SIDE (input) CHARACTER*1 - = 'L': apply Q, Q**H, P or P**H from the Left; - = 'R': apply Q, Q**H, P or P**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q or P; - = 'C': Conjugate transpose, apply Q**H or P**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original - matrix reduced by CGEBRD. - If VECT = 'P', the number of rows in the original - matrix reduced by CGEBRD. - K >= 0. - - A (input) COMPLEX array, dimension - (LDA,min(nq,K)) if VECT = 'Q' - (LDA,nq) if VECT = 'P' - The vectors which define the elementary reflectors H(i) and - G(i), whose products determine the matrices Q and P, as - returned by CGEBRD. - - LDA (input) INTEGER - The leading dimension of the array A. - If VECT = 'Q', LDA >= max(1,nq); - if VECT = 'P', LDA >= max(1,min(nq,K)). - - TAU (input) COMPLEX array, dimension (min(nq,K)) - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i) which determines Q or P, as returned - by CGEBRD in the array argument TAUQ or TAUP. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q - or P*C or P**H*C or C*P or C*P**H. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - applyq = lsame_(vect, "Q"); - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! applyq && ! lsame_(vect, "P")) { - *info = -1; - } else if (! left && ! lsame_(side, "R")) { - *info = -2; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*k < 0) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = 1, i__2 = min(nq,*k); - if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { - *info = -8; - } else if (*ldc < max(1,*m)) { - *info = -11; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -13; - } - } - - if (*info == 0) { - if (applyq) { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNMBR", &i__1); - return 0; - } else if (lquery) { - } - -/* Quick return if possible */ - - work[1].r = 1.f, work[1].i = 0.f; - if (*m == 0 || *n == 0) { - return 0; - } - - if (applyq) { - -/* Apply Q */ - - if (nq >= *k) { - -/* Q was determined by a call to CGEBRD with nq >= k */ - - cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* Q was determined by a call to CGEBRD with nq < k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] - , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - } else { - -/* Apply P */ - - if (notran) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - if (nq > *k) { - -/* P was determined by a call to CGEBRD with nq > k */ - - cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* P was determined by a call to CGEBRD with nq <= k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - cunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, - &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & - iinfo); - } - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNMBR */ - -} /* cunmbr_ */ - -/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - complex q__1; - - /* Builtin functions */ - void r_cnjg(complex *, complex *); - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static complex aii; - static logical left; - static complex taui; - extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * - , integer *, complex *, complex *, integer *, complex *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), - xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - CUNML2 overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k)' . . . H(2)' H(1)' - - as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGELQF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNML2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - r_cnjg(&q__1, &tau[i__]); - taui.r = q__1.r, taui.i = q__1.i; - } else { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } - if (i__ < nq) { - i__3 = nq - i__; - clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); - } - i__3 = i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = i__ + i__ * a_dim1; - a[i__3].r = 1.f, a[i__3].i = 0.f; - clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + - jc * c_dim1], ldc, &work[1]); - i__3 = i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; - if (i__ < nq) { - i__3 = nq - i__; - clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); - } -/* L10: */ - } - return 0; - -/* End of CUNML2 */ - -} /* cunml2_ */ - -/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static complex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *), clarfb_(char *, char *, - char *, char *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, complex *, integer *, complex *, - integer *), clarft_(char *, char * - , integer *, integer *, complex *, integer *, complex *, complex * - , integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork; - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNMLQ overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k)' . . . H(2)' H(1)' - - as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGELQF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNMLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - if (notran) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], - lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ - + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], - ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNMLQ */ - -} /* cunmlq_ */ - -/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static complex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *), clarfb_(char *, char *, - char *, char *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, complex *, integer *, complex *, - integer *), clarft_(char *, char * - , integer *, integer *, complex *, integer *, complex *, complex * - , integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNMQL overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQLF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNMQL", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i+ib-1) . . . H(i+1) H(i) -*/ - - i__4 = nq - *k + i__ + ib - 1; - clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] - , lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ - - mi = *m - *k + i__ + ib - 1; - } else { - -/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ - - ni = *n - *k + i__ + ib - 1; - } - -/* Apply H or H' */ - - clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ - i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & - work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNMQL */ - -} /* cunmql_ */ - -/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n, - integer *k, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static complex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *), clarfb_(char *, char *, - char *, char *, integer *, integer *, integer *, complex *, - integer *, complex *, integer *, complex *, integer *, complex *, - integer *), clarft_(char *, char * - , integer *, integer *, complex *, integer *, complex *, complex * - , integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNMQR overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - CGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CGEQRF. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("CUNMQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], t, &c__65) - ; - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ - i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * - c_dim1], ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNMQR */ - -} /* cunmqr_ */ - -/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m, - integer *n, complex *a, integer *lda, complex *tau, complex *c__, - integer *ldc, complex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *, - integer *, complex *, integer *, complex *, complex *, integer *, - complex *, integer *, integer *), cunmqr_(char *, - char *, integer *, integer *, integer *, complex *, integer *, - complex *, complex *, integer *, complex *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - CUNMTR overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix of order nq, with nq = m if - SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of - nq-1 elementary reflectors, as returned by CHETRD: - - if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); - - if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A contains elementary reflectors - from CHETRD; - = 'L': Lower triangle of A contains elementary reflectors - from CHETRD. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - A (input) COMPLEX array, dimension - (LDA,M) if SIDE = 'L' - (LDA,N) if SIDE = 'R' - The vectors which define the elementary reflectors, as - returned by CHETRD. - - LDA (input) INTEGER - The leading dimension of the array A. - LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. - - TAU (input) COMPLEX array, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by CHETRD. - - C (input/output) COMPLEX array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >=M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! upper && ! lsame_(uplo, "L")) { - *info = -2; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - if (upper) { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1].r = (real) lwkopt, work[1].i = 0.f; - } - - if (*info != 0) { - i__2 = -(*info); - xerbla_("CUNMTR", &i__2); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || nq == 1) { - work[1].r = 1.f, work[1].i = 0.f; - return 0; - } - - if (left) { - mi = *m - 1; - ni = *n; - } else { - mi = *m; - ni = *n - 1; - } - - if (upper) { - -/* Q was determined by a call to CHETRD with UPLO = 'U' */ - - i__2 = nq - 1; - cunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & - tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); - } else { - -/* Q was determined by a call to CHETRD with UPLO = 'L' */ - - if (left) { - i1 = 2; - i2 = 1; - } else { - i1 = 1; - i2 = 2; - } - i__2 = nq - 1; - cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & - c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - work[1].r = (real) lwkopt, work[1].i = 0.f; - return 0; - -/* End of CUNMTR */ - -} /* cunmtr_ */ - -/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * - d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, - integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * - iwork, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double d_sign(doublereal *, doublereal *), log(doublereal); - - /* Local variables */ - static integer i__, j, k; - static doublereal p, r__; - static integer z__, ic, ii, kk; - static doublereal cs; - static integer is, iu; - static doublereal sn; - static integer nm1; - static doublereal eps; - static integer ivt, difl, difr, ierr, perm, mlvl, sqre; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * - , doublereal *, integer *), dswap_(integer *, doublereal *, - integer *, doublereal *, integer *); - static integer poles, iuplo, nsize, start; - extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - integer *, integer *, doublereal *, integer *); - - extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *), dlascl_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, integer *, doublereal *, integer *, - integer *), dlasdq_(char *, integer *, integer *, integer - *, integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *), dlaset_(char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *); - static integer givcol; - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - static integer icompq; - static doublereal orgnrm; - static integer givnum, givptr, qstart, smlsiz, wstart, smlszp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 1, 1999 - - - Purpose - ======= - - DBDSDC computes the singular value decomposition (SVD) of a real - N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, - using a divide and conquer method, where S is a diagonal matrix - with non-negative diagonal elements (the singular values of B), and - U and VT are orthogonal matrices of left and right singular vectors, - respectively. DBDSDC can be used to compute all singular values, - and optionally, singular vectors or singular vectors in compact form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See DLASD3 for details. - - The code currently call DLASDQ if singular values only are desired. - However, it can be slightly modified to compute singular values - using the divide and conquer method. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': B is upper bidiagonal. - = 'L': B is lower bidiagonal. - - COMPQ (input) CHARACTER*1 - Specifies whether singular vectors are to be computed - as follows: - = 'N': Compute singular values only; - = 'P': Compute singular values and compute singular - vectors in compact form; - = 'I': Compute singular values and singular vectors. - - N (input) INTEGER - The order of the matrix B. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the n diagonal elements of the bidiagonal matrix B. - On exit, if INFO=0, the singular values of B. - - E (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the elements of E contain the offdiagonal - elements of the bidiagonal matrix whose SVD is desired. - On exit, E has been destroyed. - - U (output) DOUBLE PRECISION array, dimension (LDU,N) - If COMPQ = 'I', then: - On exit, if INFO = 0, U contains the left singular vectors - of the bidiagonal matrix. - For other values of COMPQ, U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1. - If singular vectors are desired, then LDU >= max( 1, N ). - - VT (output) DOUBLE PRECISION array, dimension (LDVT,N) - If COMPQ = 'I', then: - On exit, if INFO = 0, VT' contains the right singular - vectors of the bidiagonal matrix. - For other values of COMPQ, VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1. - If singular vectors are desired, then LDVT >= max( 1, N ). - - Q (output) DOUBLE PRECISION array, dimension (LDQ) - If COMPQ = 'P', then: - On exit, if INFO = 0, Q and IQ contain the left - and right singular vectors in a compact form, - requiring O(N log N) space instead of 2*N**2. - In particular, Q contains all the DOUBLE PRECISION data in - LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) - words of memory, where SMLSIZ is returned by ILAENV and - is equal to the maximum size of the subproblems at the - bottom of the computation tree (usually about 25). - For other values of COMPQ, Q is not referenced. - - IQ (output) INTEGER array, dimension (LDIQ) - If COMPQ = 'P', then: - On exit, if INFO = 0, Q and IQ contain the left - and right singular vectors in a compact form, - requiring O(N log N) space instead of 2*N**2. - In particular, IQ contains all INTEGER data in - LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) - words of memory, where SMLSIZ is returned by ILAENV and - is equal to the maximum size of the subproblems at the - bottom of the computation tree (usually about 25). - For other values of COMPQ, IQ is not referenced. - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) - If COMPQ = 'N' then LWORK >= (4 * N). - If COMPQ = 'P' then LWORK >= (6 * N). - If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). - - IWORK (workspace) INTEGER array, dimension (8*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an singular value. - The update process of divide and conquer failed. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --q; - --iq; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - iuplo = 0; - if (lsame_(uplo, "U")) { - iuplo = 1; - } - if (lsame_(uplo, "L")) { - iuplo = 2; - } - if (lsame_(compq, "N")) { - icompq = 0; - } else if (lsame_(compq, "P")) { - icompq = 1; - } else if (lsame_(compq, "I")) { - icompq = 2; - } else { - icompq = -1; - } - if (iuplo == 0) { - *info = -1; - } else if (icompq < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { - *info = -7; - } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DBDSDC", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - if (*n == 1) { - if (icompq == 1) { - q[1] = d_sign(&c_b2453, &d__[1]); - q[smlsiz * *n + 1] = 1.; - } else if (icompq == 2) { - u[u_dim1 + 1] = d_sign(&c_b2453, &d__[1]); - vt[vt_dim1 + 1] = 1.; - } - d__[1] = abs(d__[1]); - return 0; - } - nm1 = *n - 1; - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left -*/ - - wstart = 1; - qstart = 3; - if (icompq == 1) { - dcopy_(n, &d__[1], &c__1, &q[1], &c__1); - i__1 = *n - 1; - dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); - } - if (iuplo == 2) { - qstart = 5; - wstart = (*n << 1) - 1; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (icompq == 1) { - q[i__ + (*n << 1)] = cs; - q[i__ + *n * 3] = sn; - } else if (icompq == 2) { - work[i__] = cs; - work[nm1 + i__] = -sn; - } -/* L10: */ - } - } - -/* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ - - if (icompq == 0) { - dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ - vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ - wstart], info); - goto L40; - } - -/* - If N is smaller than the minimum divide size SMLSIZ, then solve - the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompq == 2) { - dlaset_("A", n, n, &c_b2467, &c_b2453, &u[u_offset], ldu); - dlaset_("A", n, n, &c_b2467, &c_b2453, &vt[vt_offset], ldvt); - dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] - , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ - wstart], info); - } else if (icompq == 1) { - iu = 1; - ivt = iu + *n; - dlaset_("A", n, n, &c_b2467, &c_b2453, &q[iu + (qstart - 1) * *n], - n); - dlaset_("A", n, n, &c_b2467, &c_b2453, &q[ivt + (qstart - 1) * *n] - , n); - dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( - qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ - iu + (qstart - 1) * *n], n, &work[wstart], info); - } - goto L40; - } - - if (icompq == 2) { - dlaset_("A", n, n, &c_b2467, &c_b2453, &u[u_offset], ldu); - dlaset_("A", n, n, &c_b2467, &c_b2453, &vt[vt_offset], ldvt); - } - -/* Scale. */ - - orgnrm = dlanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.) { - return 0; - } - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, &c__1, &d__[1], n, &ierr); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &nm1, &c__1, &e[1], &nm1, & - ierr); - - eps = EPSILON; - - mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / - log(2.)) + 1; - smlszp = smlsiz + 1; - - if (icompq == 1) { - iu = 1; - ivt = smlsiz + 1; - difl = ivt + smlszp; - difr = difl + mlvl; - z__ = difr + (mlvl << 1); - ic = z__ + mlvl; - is = ic + 1; - poles = is + 1; - givnum = poles + (mlvl << 1); - - k = 1; - givptr = 2; - perm = 3; - givcol = perm + mlvl; - } - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = d__[i__], abs(d__1)) < eps) { - d__[i__] = d_sign(&eps, &d__[i__]); - } -/* L20: */ - } - - start = 1; - sqre = 0; - - i__1 = nm1; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { - -/* - Subproblem found. First determine its size and then - apply divide and conquer on it. -*/ - - if (i__ < nm1) { - -/* A subproblem with E(I) small for I < NM1. */ - - nsize = i__ - start + 1; - } else if ((d__1 = e[i__], abs(d__1)) >= eps) { - -/* A subproblem with E(NM1) not too small but I = NM1. */ - - nsize = *n - start + 1; - } else { - -/* - A subproblem with E(NM1) small. This implies an - 1-by-1 subproblem at D(N). Solve this 1-by-1 problem - first. -*/ - - nsize = i__ - start + 1; - if (icompq == 2) { - u[*n + *n * u_dim1] = d_sign(&c_b2453, &d__[*n]); - vt[*n + *n * vt_dim1] = 1.; - } else if (icompq == 1) { - q[*n + (qstart - 1) * *n] = d_sign(&c_b2453, &d__[*n]); - q[*n + (smlsiz + qstart - 1) * *n] = 1.; - } - d__[*n] = (d__1 = d__[*n], abs(d__1)); - } - if (icompq == 2) { - dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + - start * u_dim1], ldu, &vt[start + start * vt_dim1], - ldvt, &smlsiz, &iwork[1], &work[wstart], info); - } else { - dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ - start], &q[start + (iu + qstart - 2) * *n], n, &q[ - start + (ivt + qstart - 2) * *n], &iq[start + k * *n], - &q[start + (difl + qstart - 2) * *n], &q[start + ( - difr + qstart - 2) * *n], &q[start + (z__ + qstart - - 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ - start + givptr * *n], &iq[start + givcol * *n], n, & - iq[start + perm * *n], &q[start + (givnum + qstart - - 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ - start + (is + qstart - 2) * *n], &work[wstart], & - iwork[1], info); - if (*info != 0) { - return 0; - } - } - start = i__ + 1; - } -/* L30: */ - } - -/* Unscale */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n, &ierr); -L40: - -/* Use Selection Sort to minimize swaps of singular vectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - kk = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] > p) { - kk = j; - p = d__[j]; - } -/* L50: */ - } - if (kk != i__) { - d__[kk] = d__[i__]; - d__[i__] = p; - if (icompq == 1) { - iq[i__] = kk; - } else if (icompq == 2) { - dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & - c__1); - dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); - } - } else if (icompq == 1) { - iq[i__] = i__; - } -/* L60: */ - } - -/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ - - if (icompq == 1) { - if (iuplo == 1) { - iq[*n] = 1; - } else { - iq[*n] = 0; - } - } - -/* - If B is lower bidiagonal, update U by those Givens rotations - which rotated B to be upper bidiagonal -*/ - - if (iuplo == 2 && icompq == 2) { - dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); - } - - return 0; - -/* End of DBDSDC */ - -} /* dbdsdc_ */ - -/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * - nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, - integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * - ldc, doublereal *work, integer *info) -{ - /* System generated locals */ - integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2; - doublereal d__1, d__2, d__3, d__4; - - /* Builtin functions */ - double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( - doublereal *, doublereal *); - - /* Local variables */ - static doublereal f, g, h__; - static integer i__, j, m; - static doublereal r__, cs; - static integer ll; - static doublereal sn, mu; - static integer nm1, nm12, nm13, lll; - static doublereal eps, sll, tol, abse; - static integer idir; - static doublereal abss; - static integer oldm; - static doublereal cosl; - static integer isub, iter; - static doublereal unfl, sinl, cosr, smin, smax, sinr; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *), dlas2_( - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *), dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - static doublereal oldcs; - extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *); - static integer oldll; - static doublereal shift, sigmn, oldsn; - extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer maxit; - static doublereal sminl, sigmx; - static logical lower; - extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *, - doublereal *, integer *), dlasv2_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *); - - extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *), xerbla_(char *, - integer *); - static doublereal sminoa, thresh; - static logical rotate; - static doublereal sminlo, tolmul; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DBDSQR computes the singular value decomposition (SVD) of a real - N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' - denotes the transpose of P), where S is a diagonal matrix with - non-negative diagonal elements (the singular values of B), and Q - and P are orthogonal matrices. - - The routine computes S, and optionally computes U * Q, P' * VT, - or Q' * C, for given real input matrices U, VT, and C. - - See "Computing Small Singular Values of Bidiagonal Matrices With - Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, - LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, - no. 5, pp. 873-912, Sept 1990) and - "Accurate singular values and differential qd algorithms," by - B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics - Department, University of California at Berkeley, July 1992 - for a detailed description of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': B is upper bidiagonal; - = 'L': B is lower bidiagonal. - - N (input) INTEGER - The order of the matrix B. N >= 0. - - NCVT (input) INTEGER - The number of columns of the matrix VT. NCVT >= 0. - - NRU (input) INTEGER - The number of rows of the matrix U. NRU >= 0. - - NCC (input) INTEGER - The number of columns of the matrix C. NCC >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the n diagonal elements of the bidiagonal matrix B. - On exit, if INFO=0, the singular values of B in decreasing - order. - - E (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the elements of E contain the - offdiagonal elements of the bidiagonal matrix whose SVD - is desired. On normal exit (INFO = 0), E is destroyed. - If the algorithm does not converge (INFO > 0), D and E - will contain the diagonal and superdiagonal elements of a - bidiagonal matrix orthogonally equivalent to the one given - as input. E(N) is used for workspace. - - VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) - On entry, an N-by-NCVT matrix VT. - On exit, VT is overwritten by P' * VT. - VT is not referenced if NCVT = 0. - - LDVT (input) INTEGER - The leading dimension of the array VT. - LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. - - U (input/output) DOUBLE PRECISION array, dimension (LDU, N) - On entry, an NRU-by-N matrix U. - On exit, U is overwritten by U * Q. - U is not referenced if NRU = 0. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= max(1,NRU). - - C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) - On entry, an N-by-NCC matrix C. - On exit, C is overwritten by Q' * C. - C is not referenced if NCC = 0. - - LDC (input) INTEGER - The leading dimension of the array C. - LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. - - WORK (workspace) DOUBLE PRECISION array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: If INFO = -i, the i-th argument had an illegal value - > 0: the algorithm did not converge; D and E contain the - elements of a bidiagonal matrix which is orthogonally - similar to the input matrix B; if INFO = i, i - elements of E have not converged to zero. - - Internal Parameters - =================== - - TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) - TOLMUL controls the convergence criterion of the QR loop. - If it is positive, TOLMUL*EPS is the desired relative - precision in the computed singular values. - If it is negative, abs(TOLMUL*EPS*sigma_max) is the - desired absolute accuracy in the computed singular - values (corresponds to relative accuracy - abs(TOLMUL*EPS) in the largest singular value. - abs(TOLMUL) should be between 1 and 1/EPS, and preferably - between 10 (for fast convergence) and .1/EPS - (for there to be some accuracy in the results). - Default is to lose at either one eighth or 2 of the - available decimal digits in each computed singular value - (whichever is smaller). - - MAXITR INTEGER, default = 6 - MAXITR controls the maximum number of passes of the - algorithm through its inner loop. The algorithms stops - (and so fails to converge) if the number of passes - through the inner loop exceeds MAXITR*N**2. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - lower = lsame_(uplo, "L"); - if (! lsame_(uplo, "U") && ! lower) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ncvt < 0) { - *info = -3; - } else if (*nru < 0) { - *info = -4; - } else if (*ncc < 0) { - *info = -5; - } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { - *info = -9; - } else if (*ldu < max(1,*nru)) { - *info = -11; - } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DBDSQR", &i__1); - return 0; - } - if (*n == 0) { - return 0; - } - if (*n == 1) { - goto L160; - } - -/* ROTATE is true if any singular vectors desired, false otherwise */ - - rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; - -/* If no singular vectors desired, use qd algorithm */ - - if (! rotate) { - dlasq1_(n, &d__[1], &e[1], &work[1], info); - return 0; - } - - nm1 = *n - 1; - nm12 = nm1 + nm1; - nm13 = nm12 + nm1; - idir = 0; - -/* Get machine constants */ - - eps = EPSILON; - unfl = SAFEMINIMUM; - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left -*/ - - if (lower) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - work[i__] = cs; - work[nm1 + i__] = sn; -/* L10: */ - } - -/* Update singular vectors if desired */ - - if (*nru > 0) { - dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], - ldu); - } - if (*ncc > 0) { - dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], - ldc); - } - } - -/* - Compute singular values to relative accuracy TOL - (By setting TOL to be negative, algorithm will compute - singular values to absolute accuracy ABS(TOL)*norm(input matrix)) - - Computing MAX - Computing MIN -*/ - d__3 = 100., d__4 = pow_dd(&eps, &c_b2532); - d__1 = 10., d__2 = min(d__3,d__4); - tolmul = max(d__1,d__2); - tol = tolmul * eps; - -/* Compute approximate maximum, minimum singular values */ - - smax = 0.; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); - smax = max(d__2,d__3); -/* L20: */ - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); - smax = max(d__2,d__3); -/* L30: */ - } - sminl = 0.; - if (tol >= 0.) { - -/* Relative accuracy desired */ - - sminoa = abs(d__[1]); - if (sminoa == 0.) { - goto L50; - } - mu = sminoa; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] - , abs(d__1)))); - sminoa = min(sminoa,mu); - if (sminoa == 0.) { - goto L50; - } -/* L40: */ - } -L50: - sminoa /= sqrt((doublereal) (*n)); -/* Computing MAX */ - d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; - thresh = max(d__1,d__2); - } else { - -/* - Absolute accuracy desired - - Computing MAX -*/ - d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; - thresh = max(d__1,d__2); - } - -/* - Prepare for main iteration loop for the singular values - (MAXIT is the maximum number of passes through the inner - loop permitted before nonconvergence signalled.) -*/ - - maxit = *n * 6 * *n; - iter = 0; - oldll = -1; - oldm = -1; - -/* M points to last element of unconverged part of matrix */ - - m = *n; - -/* Begin main iteration loop */ - -L60: - -/* Check for convergence or exceeding iteration count */ - - if (m <= 1) { - goto L160; - } - if (iter > maxit) { - goto L200; - } - -/* Find diagonal block of matrix to work on */ - - if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { - d__[m] = 0.; - } - smax = (d__1 = d__[m], abs(d__1)); - smin = smax; - i__1 = m - 1; - for (lll = 1; lll <= i__1; ++lll) { - ll = m - lll; - abss = (d__1 = d__[ll], abs(d__1)); - abse = (d__1 = e[ll], abs(d__1)); - if (tol < 0. && abss <= thresh) { - d__[ll] = 0.; - } - if (abse <= thresh) { - goto L80; - } - smin = min(smin,abss); -/* Computing MAX */ - d__1 = max(smax,abss); - smax = max(d__1,abse); -/* L70: */ - } - ll = 0; - goto L90; -L80: - e[ll] = 0.; - -/* Matrix splits since E(LL) = 0 */ - - if (ll == m - 1) { - -/* Convergence of bottom singular value, return to top of loop */ - - --m; - goto L60; - } -L90: - ++ll; - -/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ - - if (ll == m - 1) { - -/* 2 by 2 block, handle separately */ - - dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, - &sinl, &cosl); - d__[m - 1] = sigmx; - e[m - 1] = 0.; - d__[m] = sigmn; - -/* Compute singular vectors, if desired */ - - if (*ncvt > 0) { - drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & - cosr, &sinr); - } - if (*nru > 0) { - drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & - c__1, &cosl, &sinl); - } - if (*ncc > 0) { - drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & - cosl, &sinl); - } - m += -2; - goto L60; - } - -/* - If working on new submatrix, choose shift direction - (from larger end diagonal element towards smaller) -*/ - - if (ll > oldm || m < oldll) { - if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { - -/* Chase bulge from top (big end) to bottom (small end) */ - - idir = 1; - } else { - -/* Chase bulge from bottom (big end) to top (small end) */ - - idir = 2; - } - } - -/* Apply convergence tests */ - - if (idir == 1) { - -/* - Run convergence test in forward direction - First apply standard test to bottom of matrix -*/ - - if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( - d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) - { - e[m - 1] = 0.; - goto L60; - } - - if (tol >= 0.) { - -/* - If relative accuracy desired, - apply convergence criterion forward -*/ - - mu = (d__1 = d__[ll], abs(d__1)); - sminl = mu; - i__1 = m - 1; - for (lll = ll; lll <= i__1; ++lll) { - if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { - e[lll] = 0.; - goto L60; - } - sminlo = sminl; - mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ - lll], abs(d__1)))); - sminl = min(sminl,mu); -/* L100: */ - } - } - - } else { - -/* - Run convergence test in backward direction - First apply standard test to top of matrix -*/ - - if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) - ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { - e[ll] = 0.; - goto L60; - } - - if (tol >= 0.) { - -/* - If relative accuracy desired, - apply convergence criterion backward -*/ - - mu = (d__1 = d__[m], abs(d__1)); - sminl = mu; - i__1 = ll; - for (lll = m - 1; lll >= i__1; --lll) { - if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { - e[lll] = 0.; - goto L60; - } - sminlo = sminl; - mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] - , abs(d__1)))); - sminl = min(sminl,mu); -/* L110: */ - } - } - } - oldll = ll; - oldm = m; - -/* - Compute shift. First, test if shifting would ruin relative - accuracy, and if so set the shift to zero. - - Computing MAX -*/ - d__1 = eps, d__2 = tol * .01; - if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { - -/* Use a zero shift to avoid loss of relative accuracy */ - - shift = 0.; - } else { - -/* Compute the shift from 2-by-2 block at end of matrix */ - - if (idir == 1) { - sll = (d__1 = d__[ll], abs(d__1)); - dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); - } else { - sll = (d__1 = d__[m], abs(d__1)); - dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); - } - -/* Test if shift negligible, and if so set to zero */ - - if (sll > 0.) { -/* Computing 2nd power */ - d__1 = shift / sll; - if (d__1 * d__1 < eps) { - shift = 0.; - } - } - } - -/* Increment iteration count */ - - iter = iter + m - ll; - -/* If SHIFT = 0, do simplified QR iteration */ - - if (shift == 0.) { - if (idir == 1) { - -/* - Chase bulge from top to bottom - Save cosines and sines for later singular vector updates -*/ - - cs = 1.; - oldcs = 1.; - i__1 = m - 1; - for (i__ = ll; i__ <= i__1; ++i__) { - d__1 = d__[i__] * cs; - dlartg_(&d__1, &e[i__], &cs, &sn, &r__); - if (i__ > ll) { - e[i__ - 1] = oldsn * r__; - } - d__1 = oldcs * r__; - d__2 = d__[i__ + 1] * sn; - dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); - work[i__ - ll + 1] = cs; - work[i__ - ll + 1 + nm1] = sn; - work[i__ - ll + 1 + nm12] = oldcs; - work[i__ - ll + 1 + nm13] = oldsn; -/* L120: */ - } - h__ = d__[m] * cs; - d__[m] = h__ * oldcs; - e[m - 1] = h__ * oldsn; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ - ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 - + 1], &u[ll * u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 - + 1], &c__[ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { - e[m - 1] = 0.; - } - - } else { - -/* - Chase bulge from bottom to top - Save cosines and sines for later singular vector updates -*/ - - cs = 1.; - oldcs = 1.; - i__1 = ll + 1; - for (i__ = m; i__ >= i__1; --i__) { - d__1 = d__[i__] * cs; - dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); - if (i__ < m) { - e[i__] = oldsn * r__; - } - d__1 = oldcs * r__; - d__2 = d__[i__ - 1] * sn; - dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); - work[i__ - ll] = cs; - work[i__ - ll + nm1] = -sn; - work[i__ - ll + nm12] = oldcs; - work[i__ - ll + nm13] = -oldsn; -/* L130: */ - } - h__ = d__[ll] * cs; - d__[ll] = h__ * oldcs; - e[ll] = h__ * oldsn; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ - nm13 + 1], &vt[ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * - u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ - ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((d__1 = e[ll], abs(d__1)) <= thresh) { - e[ll] = 0.; - } - } - } else { - -/* Use nonzero shift */ - - if (idir == 1) { - -/* - Chase bulge from top to bottom - Save cosines and sines for later singular vector updates -*/ - - f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b2453, & - d__[ll]) + shift / d__[ll]); - g = e[ll]; - i__1 = m - 1; - for (i__ = ll; i__ <= i__1; ++i__) { - dlartg_(&f, &g, &cosr, &sinr, &r__); - if (i__ > ll) { - e[i__ - 1] = r__; - } - f = cosr * d__[i__] + sinr * e[i__]; - e[i__] = cosr * e[i__] - sinr * d__[i__]; - g = sinr * d__[i__ + 1]; - d__[i__ + 1] = cosr * d__[i__ + 1]; - dlartg_(&f, &g, &cosl, &sinl, &r__); - d__[i__] = r__; - f = cosl * e[i__] + sinl * d__[i__ + 1]; - d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; - if (i__ < m - 1) { - g = sinl * e[i__ + 1]; - e[i__ + 1] = cosl * e[i__ + 1]; - } - work[i__ - ll + 1] = cosr; - work[i__ - ll + 1 + nm1] = sinr; - work[i__ - ll + 1 + nm12] = cosl; - work[i__ - ll + 1 + nm13] = sinl; -/* L140: */ - } - e[m - 1] = f; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ - ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 - + 1], &u[ll * u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 - + 1], &c__[ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { - e[m - 1] = 0.; - } - - } else { - -/* - Chase bulge from bottom to top - Save cosines and sines for later singular vector updates -*/ - - f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b2453, &d__[ - m]) + shift / d__[m]); - g = e[m - 1]; - i__1 = ll + 1; - for (i__ = m; i__ >= i__1; --i__) { - dlartg_(&f, &g, &cosr, &sinr, &r__); - if (i__ < m) { - e[i__] = r__; - } - f = cosr * d__[i__] + sinr * e[i__ - 1]; - e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; - g = sinr * d__[i__ - 1]; - d__[i__ - 1] = cosr * d__[i__ - 1]; - dlartg_(&f, &g, &cosl, &sinl, &r__); - d__[i__] = r__; - f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; - d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; - if (i__ > ll + 1) { - g = sinl * e[i__ - 2]; - e[i__ - 2] = cosl * e[i__ - 2]; - } - work[i__ - ll] = cosr; - work[i__ - ll + nm1] = -sinr; - work[i__ - ll + nm12] = cosl; - work[i__ - ll + nm13] = -sinl; -/* L150: */ - } - e[ll] = f; - -/* Test convergence */ - - if ((d__1 = e[ll], abs(d__1)) <= thresh) { - e[ll] = 0.; - } - -/* Update singular vectors if desired */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ - nm13 + 1], &vt[ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * - u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ - ll + c_dim1], ldc); - } - } - } - -/* QR iteration finished, go back and check convergence */ - - goto L60; - -/* All singular values converged, so make them positive */ - -L160: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (d__[i__] < 0.) { - d__[i__] = -d__[i__]; - -/* Change sign of singular vectors, if desired */ - - if (*ncvt > 0) { - dscal_(ncvt, &c_b2589, &vt[i__ + vt_dim1], ldvt); - } - } -/* L170: */ - } - -/* - Sort the singular values into decreasing order (insertion sort on - singular values, but only one transposition per singular vector) -*/ - - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Scan for smallest D(I) */ - - isub = 1; - smin = d__[1]; - i__2 = *n + 1 - i__; - for (j = 2; j <= i__2; ++j) { - if (d__[j] <= smin) { - isub = j; - smin = d__[j]; - } -/* L180: */ - } - if (isub != *n + 1 - i__) { - -/* Swap singular values and vectors */ - - d__[isub] = d__[*n + 1 - i__]; - d__[*n + 1 - i__] = smin; - if (*ncvt > 0) { - dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + - vt_dim1], ldvt); - } - if (*nru > 0) { - dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * - u_dim1 + 1], &c__1); - } - if (*ncc > 0) { - dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + - c_dim1], ldc); - } - } -/* L190: */ - } - goto L220; - -/* Maximum number of iterations exceeded, failure to converge */ - -L200: - *info = 0; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.) { - ++(*info); - } -/* L210: */ - } -L220: - return 0; - -/* End of DBDSQR */ - -} /* dbdsqr_ */ - -/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, - integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * - ldv, integer *info) -{ - /* System generated locals */ - integer v_dim1, v_offset, i__1; - - /* Local variables */ - static integer i__, k; - static doublereal s; - static integer ii; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, - doublereal *, integer *); - static logical leftv; - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical rightv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DGEBAK forms the right or left eigenvectors of a real general matrix - by backward transformation on the computed eigenvectors of the - balanced matrix output by DGEBAL. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the type of backward transformation required: - = 'N', do nothing, return immediately; - = 'P', do backward transformation for permutation only; - = 'S', do backward transformation for scaling only; - = 'B', do backward transformations for both permutation and - scaling. - JOB must be the same as the argument JOB supplied to DGEBAL. - - SIDE (input) CHARACTER*1 - = 'R': V contains right eigenvectors; - = 'L': V contains left eigenvectors. - - N (input) INTEGER - The number of rows of the matrix V. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - The integers ILO and IHI determined by DGEBAL. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - SCALE (input) DOUBLE PRECISION array, dimension (N) - Details of the permutation and scaling factors, as returned - by DGEBAL. - - M (input) INTEGER - The number of columns of the matrix V. M >= 0. - - V (input/output) DOUBLE PRECISION array, dimension (LDV,M) - On entry, the matrix of right or left eigenvectors to be - transformed, as returned by DHSEIN or DTREVC. - On exit, V is overwritten by the transformed eigenvectors. - - LDV (input) INTEGER - The leading dimension of the array V. LDV >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Decode and Test the input parameters -*/ - - /* Parameter adjustments */ - --scale; - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - - /* Function Body */ - rightv = lsame_(side, "R"); - leftv = lsame_(side, "L"); - - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (! rightv && ! leftv) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*m < 0) { - *info = -7; - } else if (*ldv < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEBAK", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*m == 0) { - return 0; - } - if (lsame_(job, "N")) { - return 0; - } - - if (*ilo == *ihi) { - goto L30; - } - -/* Backward balance */ - - if (lsame_(job, "S") || lsame_(job, "B")) { - - if (rightv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = scale[i__]; - dscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L10: */ - } - } - - if (leftv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = 1. / scale[i__]; - dscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L20: */ - } - } - - } - -/* - Backward permutation - - For I = ILO-1 step -1 until 1, - IHI+1 step 1 until N do -- -*/ - -L30: - if (lsame_(job, "P") || lsame_(job, "B")) { - if (rightv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L40; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = (integer) scale[i__]; - if (k == i__) { - goto L40; - } - dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L40: - ; - } - } - - if (leftv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L50; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = (integer) scale[i__]; - if (k == i__) { - goto L50; - } - dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L50: - ; - } - } - } - - return 0; - -/* End of DGEBAK */ - -} /* dgebak_ */ - -/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer * - lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Local variables */ - static doublereal c__, f, g; - static integer i__, j, k, l, m; - static doublereal r__, s, ca, ra; - static integer ica, ira, iexc; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, - doublereal *, integer *); - static doublereal sfmin1, sfmin2, sfmax1, sfmax2; - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical noconv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DGEBAL balances a general real matrix A. This involves, first, - permuting A by a similarity transformation to isolate eigenvalues - in the first 1 to ILO-1 and last IHI+1 to N elements on the - diagonal; and second, applying a diagonal similarity transformation - to rows and columns ILO to IHI to make the rows and columns as - close in norm as possible. Both steps are optional. - - Balancing may reduce the 1-norm of the matrix, and improve the - accuracy of the computed eigenvalues and/or eigenvectors. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the operations to be performed on A: - = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 - for i = 1,...,N; - = 'P': permute only; - = 'S': scale only; - = 'B': both permute and scale. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the input matrix A. - On exit, A is overwritten by the balanced matrix. - If JOB = 'N', A is not referenced. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - ILO (output) INTEGER - IHI (output) INTEGER - ILO and IHI are set to integers such that on exit - A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. - If JOB = 'N' or 'S', ILO = 1 and IHI = N. - - SCALE (output) DOUBLE PRECISION array, dimension (N) - Details of the permutations and scaling factors applied to - A. If P(j) is the index of the row and column interchanged - with row and column j and D(j) is the scaling factor - applied to row and column j, then - SCALE(j) = P(j) for j = 1,...,ILO-1 - = D(j) for j = ILO,...,IHI - = P(j) for j = IHI+1,...,N. - The order in which the interchanges are made is N to IHI+1, - then 1 to ILO-1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The permutations consist of row and column interchanges which put - the matrix in the form - - ( T1 X Y ) - P A P = ( 0 B Z ) - ( 0 0 T2 ) - - where T1 and T2 are upper triangular matrices whose eigenvalues lie - along the diagonal. The column indices ILO and IHI mark the starting - and ending columns of the submatrix B. Balancing consists of applying - a diagonal similarity transformation inv(D) * B * D to make the - 1-norms of each row of B and its corresponding column nearly equal. - The output matrix is - - ( T1 X*D Y ) - ( 0 inv(D)*B*D inv(D)*Z ). - ( 0 0 T2 ) - - Information about the permutations P and the diagonal matrix D is - returned in the vector SCALE. - - This subroutine is based on the EISPACK routine BALANC. - - Modified by Tzu-Yi Chen, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --scale; - - /* Function Body */ - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEBAL", &i__1); - return 0; - } - - k = 1; - l = *n; - - if (*n == 0) { - goto L210; - } - - if (lsame_(job, "N")) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scale[i__] = 1.; -/* L10: */ - } - goto L210; - } - - if (lsame_(job, "S")) { - goto L120; - } - -/* Permutation to isolate eigenvalues if possible */ - - goto L50; - -/* Row and column exchange. */ - -L20: - scale[m] = (doublereal) j; - if (j == m) { - goto L30; - } - - dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); - i__1 = *n - k + 1; - dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); - -L30: - switch (iexc) { - case 1: goto L40; - case 2: goto L80; - } - -/* Search for rows isolating an eigenvalue and push them down. */ - -L40: - if (l == 1) { - goto L210; - } - --l; - -L50: - for (j = l; j >= 1; --j) { - - i__1 = l; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ == j) { - goto L60; - } - if (a[j + i__ * a_dim1] != 0.) { - goto L70; - } -L60: - ; - } - - m = l; - iexc = 1; - goto L20; -L70: - ; - } - - goto L90; - -/* Search for columns isolating an eigenvalue and push them left. */ - -L80: - ++k; - -L90: - i__1 = l; - for (j = k; j <= i__1; ++j) { - - i__2 = l; - for (i__ = k; i__ <= i__2; ++i__) { - if (i__ == j) { - goto L100; - } - if (a[i__ + j * a_dim1] != 0.) { - goto L110; - } -L100: - ; - } - - m = k; - iexc = 2; - goto L20; -L110: - ; - } - -L120: - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - scale[i__] = 1.; -/* L130: */ - } - - if (lsame_(job, "P")) { - goto L210; - } - -/* - Balance the submatrix in rows K to L. - - Iterative loop for norm reduction -*/ - - sfmin1 = SAFEMINIMUM / PRECISION; - sfmax1 = 1. / sfmin1; - sfmin2 = sfmin1 * 8.; - sfmax2 = 1. / sfmin2; -L140: - noconv = FALSE_; - - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - c__ = 0.; - r__ = 0.; - - i__2 = l; - for (j = k; j <= i__2; ++j) { - if (j == i__) { - goto L150; - } - c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1)); - r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1)); -L150: - ; - } - ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); - ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); - i__2 = *n - k + 1; - ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); - ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); - -/* Guard against zero C or R due to underflow. */ - - if (c__ == 0. || r__ == 0.) { - goto L200; - } - g = r__ / 8.; - f = 1.; - s = c__ + r__; -L160: -/* Computing MAX */ - d__1 = max(f,c__); -/* Computing MIN */ - d__2 = min(r__,g); - if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { - goto L170; - } - f *= 8.; - c__ *= 8.; - ca *= 8.; - r__ /= 8.; - g /= 8.; - ra /= 8.; - goto L160; - -L170: - g = c__ / 8.; -L180: -/* Computing MIN */ - d__1 = min(f,c__), d__1 = min(d__1,g); - if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { - goto L190; - } - f /= 8.; - c__ /= 8.; - g /= 8.; - ca /= 8.; - r__ *= 8.; - ra *= 8.; - goto L180; - -/* Now balance. */ - -L190: - if (c__ + r__ >= s * .95) { - goto L200; - } - if (f < 1. && scale[i__] < 1.) { - if (f * scale[i__] <= sfmin1) { - goto L200; - } - } - if (f > 1. && scale[i__] > 1.) { - if (scale[i__] >= sfmax1 / f) { - goto L200; - } - } - g = 1. / f; - scale[i__] *= f; - noconv = TRUE_; - - i__2 = *n - k + 1; - dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); - dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); - -L200: - ; - } - - if (noconv) { - goto L140; - } - -L210: - *ilo = k; - *ihi = l; - - return 0; - -/* End of DGEBAL */ - -} /* dgebal_ */ - -/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * - taup, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DGEBD2 reduces a real general m by n matrix A to upper or lower - bidiagonal form B by an orthogonal transformation: Q' * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the orthogonal matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the orthogonal matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); - u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("DGEBD2", &i__1); - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * - a_dim1], &c__1, &tauq[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - -/* Apply H(i) to A(i:m,i+1:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = d__[i__]; - - if (i__ < *n) { - -/* - Generate elementary reflector G(i) to annihilate - A(i,i+2:n) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( - i__3,*n) * a_dim1], lda, &taup[i__]); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - a[i__ + (i__ + 1) * a_dim1] = 1.; - -/* Apply G(i) to A(i+1:m,i+1:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__; - dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], - lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - a[i__ + (i__ + 1) * a_dim1] = e[i__]; - } else { - taup[i__] = 0.; - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * - a_dim1], lda, &taup[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - -/* Apply G(i) to A(i+1:m,i:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; -/* Computing MIN */ - i__4 = i__ + 1; - dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ - i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = d__[i__]; - - if (i__ < *m) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:m,i) -*/ - - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + - i__ * a_dim1], &c__1, &tauq[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.; - -/* Apply H(i) to A(i+1:m,i+1:n) from the left */ - - i__2 = *m - i__; - i__3 = *n - i__; - dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & - c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - a[i__ + 1 + i__ * a_dim1] = e[i__]; - } else { - tauq[i__] = 0.; - } -/* L20: */ - } - } - return 0; - -/* End of DGEBD2 */ - -} /* dgebd2_ */ - -/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * - taup, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, nb, nx; - static doublereal ws; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer nbmin, iinfo, minmn; - extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *), dlabrd_(integer *, integer *, integer * - , doublereal *, integer *, doublereal *, doublereal *, doublereal - *, doublereal *, doublereal *, integer *, doublereal *, integer *) - , xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwrkx, ldwrky, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DGEBRD reduces a general real M-by-N matrix A to upper or lower - bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the orthogonal matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the orthogonal matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,M,N). - For optimum performance LWORK >= (M+N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); - u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; -/* Computing MAX */ - i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = max(i__1,i__2); - lwkopt = (*m + *n) * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = max(1,*m); - if (*lwork < max(i__1,*n) && ! lquery) { - *info = -10; - } - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("DGEBRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - minmn = min(*m,*n); - if (minmn == 0) { - work[1] = 1.; - return 0; - } - - ws = (doublereal) max(*m,*n); - ldwrkx = *m; - ldwrky = *n; - - if (nb > 1 && nb < minmn) { - -/* - Set the crossover point NX. - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - -/* Determine when to switch from blocked to unblocked code. */ - - if (nx < minmn) { - ws = (doublereal) ((*m + *n) * nb); - if ((doublereal) (*lwork) < ws) { - -/* - Not enough work space for the optimal NB, consider using - a smaller block size. -*/ - - nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - if (*lwork >= (*m + *n) * nbmin) { - nb = *lwork / (*m + *n); - } else { - nb = 1; - nx = minmn; - } - } - } - } else { - nx = minmn; - } - - i__1 = minmn - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - -/* - Reduce rows and columns i:i+nb-1 to bidiagonal form and return - the matrices X and Y which are needed to update the unreduced - part of the matrix -*/ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ + 1; - dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ - i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx - * nb + 1], &ldwrky); - -/* - Update the trailing submatrix A(i+nb:m,i+nb:n), using an update - of the form A := A - V*Y' - X*U' -*/ - - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b2589, &a[ - i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & - ldwrky, &c_b2453, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b2589, & - work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & - c_b2453, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - -/* Copy diagonal and off-diagonal elements of B back into A */ - - if (*m >= *n) { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + j * a_dim1] = d__[j]; - a[j + (j + 1) * a_dim1] = e[j]; -/* L10: */ - } - } else { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + j * a_dim1] = d__[j]; - a[j + 1 + j * a_dim1] = e[j]; -/* L20: */ - } - } -/* L30: */ - } - -/* Use unblocked code to reduce the remainder of the matrix */ - - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & - tauq[i__], &taup[i__], &work[1], &iinfo); - work[1] = ws; - return 0; - -/* End of DGEBRD */ - -} /* dgebrd_ */ - -/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * - a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, - integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, k; - static doublereal r__, cs, sn; - static integer ihi; - static doublereal scl; - static integer ilo; - static doublereal dum[1], eps; - static integer ibal; - static char side[1]; - static integer maxb; - static doublereal anrm; - static integer ierr, itau; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer iwrk, nout; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern doublereal dlapy2_(doublereal *, doublereal *); - extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( - char *, char *, integer *, integer *, integer *, doublereal *, - integer *, doublereal *, integer *, integer *), - dgebal_(char *, integer *, doublereal *, integer *, integer *, - integer *, doublereal *, integer *); - static logical scalea; - - static doublereal cscale; - extern doublereal dlange_(char *, integer *, integer *, doublereal *, - integer *, doublereal *); - extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *), dlascl_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, integer *, doublereal *, integer *, - integer *); - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *), - dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *), xerbla_(char *, integer *); - static logical select[1]; - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static doublereal bignum; - extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *), dhseqr_(char *, char *, integer *, integer *, integer - *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, doublereal *, integer *); - static integer minwrk, maxwrk; - static logical wantvl; - static doublereal smlnum; - static integer hswork; - static logical lquery, wantvr; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 8, 1999 - - - Purpose - ======= - - DGEEV computes for an N-by-N real nonsymmetric matrix A, the - eigenvalues and, optionally, the left and/or right eigenvectors. - - The right eigenvector v(j) of A satisfies - A * v(j) = lambda(j) * v(j) - where lambda(j) is its eigenvalue. - The left eigenvector u(j) of A satisfies - u(j)**H * A = lambda(j) * u(j)**H - where u(j)**H denotes the conjugate transpose of u(j). - - The computed eigenvectors are normalized to have Euclidean norm - equal to 1 and largest component real. - - Arguments - ========= - - JOBVL (input) CHARACTER*1 - = 'N': left eigenvectors of A are not computed; - = 'V': left eigenvectors of A are computed. - - JOBVR (input) CHARACTER*1 - = 'N': right eigenvectors of A are not computed; - = 'V': right eigenvectors of A are computed. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the N-by-N matrix A. - On exit, A has been overwritten. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - WR (output) DOUBLE PRECISION array, dimension (N) - WI (output) DOUBLE PRECISION array, dimension (N) - WR and WI contain the real and imaginary parts, - respectively, of the computed eigenvalues. Complex - conjugate pairs of eigenvalues appear consecutively - with the eigenvalue having the positive imaginary part - first. - - VL (output) DOUBLE PRECISION array, dimension (LDVL,N) - If JOBVL = 'V', the left eigenvectors u(j) are stored one - after another in the columns of VL, in the same order - as their eigenvalues. - If JOBVL = 'N', VL is not referenced. - If the j-th eigenvalue is real, then u(j) = VL(:,j), - the j-th column of VL. - If the j-th and (j+1)-st eigenvalues form a complex - conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and - u(j+1) = VL(:,j) - i*VL(:,j+1). - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= 1; if - JOBVL = 'V', LDVL >= N. - - VR (output) DOUBLE PRECISION array, dimension (LDVR,N) - If JOBVR = 'V', the right eigenvectors v(j) are stored one - after another in the columns of VR, in the same order - as their eigenvalues. - If JOBVR = 'N', VR is not referenced. - If the j-th eigenvalue is real, then v(j) = VR(:,j), - the j-th column of VR. - If the j-th and (j+1)-st eigenvalues form a complex - conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and - v(j+1) = VR(:,j) - i*VR(:,j+1). - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= 1; if - JOBVR = 'V', LDVR >= N. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,3*N), and - if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good - performance, LWORK must generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the - eigenvalues, and no eigenvectors have been computed; - elements i+1:N of WR and WI contain eigenvalues which - have converged. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --wr; - --wi; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1; - wantvl = lsame_(jobvl, "V"); - wantvr = lsame_(jobvr, "V"); - if (! wantvl && ! lsame_(jobvl, "N")) { - *info = -1; - } else if (! wantvr && ! lsame_(jobvr, "N")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldvl < 1 || wantvl && *ldvl < *n) { - *info = -9; - } else if (*ldvr < 1 || wantvr && *ldvr < *n) { - *info = -11; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV. - HSWORK refers to the workspace preferred by DHSEQR, as - calculated below. HSWORK is computed assuming ILO=1 and IHI=N, - the worst case.) -*/ - - minwrk = 1; - if (*info == 0 && (*lwork >= 1 || lquery)) { - maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, & - c__0, (ftnlen)6, (ftnlen)1); - if (! wantvl && ! wantvr) { -/* Computing MAX */ - i__1 = 1, i__2 = *n * 3; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + - hswork; - maxwrk = max(i__1,i__2); - } else { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 2; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR" - "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + - hswork; - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n << 2; - maxwrk = max(i__1,i__2); - } - work[1] = (doublereal) maxwrk; - } - if (*lwork < minwrk && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEEV ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Get machine constants */ - - eps = PRECISION; - smlnum = SAFEMINIMUM; - bignum = 1. / smlnum; - dlabad_(&smlnum, &bignum); - smlnum = sqrt(smlnum) / eps; - bignum = 1. / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = dlange_("M", n, n, &a[a_offset], lda, dum); - scalea = FALSE_; - if (anrm > 0. && anrm < smlnum) { - scalea = TRUE_; - cscale = smlnum; - } else if (anrm > bignum) { - scalea = TRUE_; - cscale = bignum; - } - if (scalea) { - dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & - ierr); - } - -/* - Balance the matrix - (Workspace: need N) -*/ - - ibal = 1; - dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); - -/* - Reduce to upper Hessenberg form - (Workspace: need 3*N, prefer 2*N+N*NB) -*/ - - itau = ibal + *n; - iwrk = itau + *n; - i__1 = *lwork - iwrk + 1; - dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, - &ierr); - - if (wantvl) { - -/* - Want left eigenvectors - Copy Householder vectors to VL -*/ - - *(unsigned char *)side = 'L'; - dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) - ; - -/* - Generate orthogonal matrix in VL - (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) -*/ - - i__1 = *lwork - iwrk + 1; - dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VL - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vl[vl_offset], ldvl, &work[iwrk], &i__1, info); - - if (wantvr) { - -/* - Want left and right eigenvectors - Copy Schur vectors to VR -*/ - - *(unsigned char *)side = 'B'; - dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); - } - - } else if (wantvr) { - -/* - Want right eigenvectors - Copy Householder vectors to VR -*/ - - *(unsigned char *)side = 'R'; - dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) - ; - -/* - Generate orthogonal matrix in VR - (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) -*/ - - i__1 = *lwork - iwrk + 1; - dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VR - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vr[vr_offset], ldvr, &work[iwrk], &i__1, info); - - } else { - -/* - Compute eigenvalues only - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vr[vr_offset], ldvr, &work[iwrk], &i__1, info); - } - -/* If INFO > 0 from DHSEQR, then quit */ - - if (*info > 0) { - goto L50; - } - - if (wantvl || wantvr) { - -/* - Compute left and/or right eigenvectors - (Workspace: need 4*N) -*/ - - dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, - &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); - } - - if (wantvl) { - -/* - Undo balancing of left eigenvectors - (Workspace: need N) -*/ - - dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, - &ierr); - -/* Normalize left eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (wi[i__] == 0.) { - scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - } else if (wi[i__] > 0.) { - d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); - scl = 1. / dlapy2_(&d__1, &d__2); - dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing 2nd power */ - d__1 = vl[k + i__ * vl_dim1]; -/* Computing 2nd power */ - d__2 = vl[k + (i__ + 1) * vl_dim1]; - work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; -/* L10: */ - } - k = idamax_(n, &work[iwrk], &c__1); - dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], - &cs, &sn, &r__); - drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * - vl_dim1 + 1], &c__1, &cs, &sn); - vl[k + (i__ + 1) * vl_dim1] = 0.; - } -/* L20: */ - } - } - - if (wantvr) { - -/* - Undo balancing of right eigenvectors - (Workspace: need N) -*/ - - dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, - &ierr); - -/* Normalize right eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (wi[i__] == 0.) { - scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - } else if (wi[i__] > 0.) { - d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); - scl = 1. / dlapy2_(&d__1, &d__2); - dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing 2nd power */ - d__1 = vr[k + i__ * vr_dim1]; -/* Computing 2nd power */ - d__2 = vr[k + (i__ + 1) * vr_dim1]; - work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; -/* L30: */ - } - k = idamax_(n, &work[iwrk], &c__1); - dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], - &cs, &sn, &r__); - drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * - vr_dim1 + 1], &c__1, &cs, &sn); - vr[k + (i__ + 1) * vr_dim1] = 0.; - } -/* L40: */ - } - } - -/* Undo scaling if necessary */ - -L50: - if (scalea) { - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + - 1], &i__2, &ierr); - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + - 1], &i__2, &ierr); - if (*info > 0) { - i__1 = ilo - 1; - dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], - n, &ierr); - i__1 = ilo - 1; - dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], - n, &ierr); - } - } - - work[1] = (doublereal) maxwrk; - return 0; - -/* End of DGEEV */ - -} /* dgeev_ */ - -/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, - doublereal *a, integer *lda, doublereal *tau, doublereal *work, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - static doublereal aii; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DGEHD2 reduces a real general matrix A to upper Hessenberg form H by - an orthogonal similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to DGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= max(1,N). - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the n by n general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the orthogonal matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) DOUBLE PRECISION array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) DOUBLE PRECISION array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEHD2", &i__1); - return 0; - } - - i__1 = *ihi - 1; - for (i__ = *ilo; i__ <= i__1; ++i__) { - -/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ - - i__2 = *ihi - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &tau[i__]); - aii = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.; - -/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ - - i__2 = *ihi - i__; - dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); - -/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ - - i__2 = *ihi - i__; - i__3 = *n - i__; - dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); - - a[i__ + 1 + i__ * a_dim1] = aii; -/* L10: */ - } - - return 0; - -/* End of DGEHD2 */ - -} /* dgehd2_ */ - -/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, - doublereal *a, integer *lda, doublereal *tau, doublereal *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__; - static doublereal t[4160] /* was [65][64] */; - static integer ib; - static doublereal ei; - static integer nb, nh, nx, iws; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer nbmin, iinfo; - extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), - dlarfb_(char *, char *, char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DGEHRD reduces a real general matrix A to upper Hessenberg form H by - an orthogonal similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to DGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the N-by-N general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the orthogonal matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) DOUBLE PRECISION array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to - zero. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; -/* Computing MIN */ - i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - lwkopt = *n * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEHRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - tau[i__] = 0.; -/* L10: */ - } - i__1 = *n - 1; - for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { - tau[i__] = 0.; -/* L20: */ - } - -/* Quick return if possible */ - - nh = *ihi - *ilo + 1; - if (nh <= 1) { - work[1] = 1.; - return 0; - } - -/* - Determine the block size. - - Computing MIN -*/ - i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - nbmin = 2; - iws = 1; - if (nb > 1 && nb < nh) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < nh) { - -/* Determine if workspace is large enough for blocked code. */ - - iws = *n * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code. - - Computing MAX -*/ - i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - if (*lwork >= *n * nbmin) { - nb = *lwork / *n; - } else { - nb = 1; - } - } - } - } - ldwork = *n; - - if (nb < nbmin || nb >= nh) { - -/* Use unblocked code below */ - - i__ = *ilo; - - } else { - -/* Use blocked code */ - - i__1 = *ihi - 1 - nx; - i__2 = nb; - for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *ihi - i__; - ib = min(i__3,i__4); - -/* - Reduce columns i:i+ib-1 to Hessenberg form, returning the - matrices V and T of the block reflector H = I - V*T*V' - which performs the reduction, and also the matrix Y = A*V*T -*/ - - dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & - c__65, &work[1], &ldwork); - -/* - Apply the block reflector H to A(1:ihi,i+ib:ihi) from the - right, computing A := A - Y * V'. V(i+ib,ib-1) must be set - to 1. -*/ - - ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; - a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; - i__3 = *ihi - i__ - ib + 1; - dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b2589, & - work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & - c_b2453, &a[(i__ + ib) * a_dim1 + 1], lda); - a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; - -/* - Apply the block reflector H to A(i+1:ihi,i+ib:n) from the - left -*/ - - i__3 = *ihi - i__; - i__4 = *n - i__ - ib + 1; - dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & - i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ - i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); -/* L30: */ - } - } - -/* Use unblocked code to reduce the rest of the matrix */ - - dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); - work[1] = (doublereal) iws; - - return 0; - -/* End of DGEHRD */ - -} /* dgehrd_ */ - -/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *tau, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static doublereal aii; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DGELQ2 computes an LQ factorization of a real m by n matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and below the diagonal of the array - contain the m by min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) DOUBLE PRECISION array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k) . . . H(2) H(1), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGELQ2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] - , lda, &tau[i__]); - if (i__ < *m) { - -/* Apply H(i) to A(i+1:m,i:n) from the right */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - i__2 = *m - i__; - i__3 = *n - i__ + 1; - dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ - i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = aii; - } -/* L10: */ - } - return 0; - -/* End of DGELQ2 */ - -} /* dgelq2_ */ - -/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, - char *, char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DGELQF computes an LQ factorization of a real M-by-N matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and below the diagonal of the array - contain the m-by-min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k) . . . H(2) H(1), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *m * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGELQF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the LQ factorization of the current block - A(i:i+ib-1,i:n) -*/ - - i__3 = *n - i__ + 1; - dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *n - i__ + 1; - dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i+ib:m,i:n) from the right */ - - i__3 = *m - i__ - ib + 1; - i__4 = *n - i__ + 1; - dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, - &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1] = (doublereal) iws; - return 0; - -/* End of DGELQF */ - -} /* dgelqf_ */ - -/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, - doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * - s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, - integer *iwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - - /* Builtin functions */ - double log(doublereal); - - /* Local variables */ - static integer ie, il, mm; - static doublereal eps, anrm, bnrm; - static integer itau, nlvl, iascl, ibscl; - static doublereal sfmin; - static integer minmn, maxmn, itaup, itauq, mnthr, nwork; - extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_( - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *); - extern doublereal dlamch_(char *), dlange_(char *, integer *, - integer *, doublereal *, integer *, doublereal *); - extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, integer *), - dlalsd_(char *, integer *, integer *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, integer *), dlascl_(char *, - integer *, integer *, doublereal *, doublereal *, integer *, - integer *, doublereal *, integer *, integer *), dgeqrf_( - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, integer *), dlacpy_(char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, - doublereal *, doublereal *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static doublereal bignum; - extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, integer *); - static integer wlalsd; - extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *); - static integer ldwork; - extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *); - static integer minwrk, maxwrk; - static doublereal smlnum; - static logical lquery; - static integer smlsiz; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DGELSD computes the minimum-norm solution to a real linear least - squares problem: - minimize 2-norm(| b - A*x |) - using the singular value decomposition (SVD) of A. A is an M-by-N - matrix which may be rank-deficient. - - Several right hand side vectors b and solution vectors x can be - handled in a single call; they are stored as the columns of the - M-by-NRHS right hand side matrix B and the N-by-NRHS solution - matrix X. - - The problem is solved in three steps: - (1) Reduce the coefficient matrix A to bidiagonal form with - Householder transformations, reducing the original problem - into a "bidiagonal least squares problem" (BLS) - (2) Solve the BLS using a divide and conquer approach. - (3) Apply back all the Householder tranformations to solve - the original least squares problem. - - The effective rank of A is determined by treating as zero those - singular values which are less than RCOND times the largest singular - value. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - M (input) INTEGER - The number of rows of A. M >= 0. - - N (input) INTEGER - The number of columns of A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrices B and X. NRHS >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, A has been destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) - On entry, the M-by-NRHS right hand side matrix B. - On exit, B is overwritten by the N-by-NRHS solution - matrix X. If m >= n and RANK = n, the residual - sum-of-squares for the solution in the i-th column is given - by the sum of squares of elements n+1:m in that column. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,max(M,N)). - - S (output) DOUBLE PRECISION array, dimension (min(M,N)) - The singular values of A in decreasing order. - The condition number of A in the 2-norm = S(1)/S(min(m,n)). - - RCOND (input) DOUBLE PRECISION - RCOND is used to determine the effective rank of A. - Singular values S(i) <= RCOND*S(1) are treated as zero. - If RCOND < 0, machine precision is used instead. - - RANK (output) INTEGER - The effective rank of A, i.e., the number of singular values - which are greater than RCOND*S(1). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK must be at least 1. - The exact minimum amount of workspace needed depends on M, - N and NRHS. As long as LWORK is at least - 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, - if M is greater than or equal to N or - 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, - if M is less than N, the code will execute correctly. - SMLSIZ is returned by ILAENV and is equal to the maximum - size of the subproblems at the bottom of the computation - tree (usually about 25), and - NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) - For good performance, LWORK should generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - IWORK (workspace) INTEGER array, dimension (LIWORK) - LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, - where MINMN = MIN( M,N ). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: the algorithm for computing the SVD failed to converge; - if INFO = i, i off-diagonal elements of an intermediate - bidiagonal form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input arguments. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - --s; - --work; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - maxmn = max(*m,*n); - mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( - ftnlen)1); - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldb < max(1,maxmn)) { - *info = -7; - } - - smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Compute workspace. - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV.) -*/ - - minwrk = 1; - minmn = max(1,minmn); -/* Computing MAX */ - i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / - log(2.)) + 1; - nlvl = max(i__1,0); - - if (*info == 0) { - maxwrk = 0; - mm = *m; - if (*m >= *n && *m >= mnthr) { - -/* Path 1a - overdetermined, with many more rows than columns. */ - - mm = *n; -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT", - m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); - maxwrk = max(i__1,i__2); - } - if (*m >= *n) { - -/* - Path 1 - overdetermined or exactly determined. - - Computing MAX -*/ - i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD" - , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR", - "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR", - "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing 2nd power */ - i__1 = smlsiz + 1; - wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * - nrhs + i__1 * i__1; -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n * 3 + wlalsd; - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), - i__2 = *n * 3 + wlalsd; - minwrk = max(i__1,i__2); - } - if (*n > *m) { -/* Computing 2nd power */ - i__1 = smlsiz + 1; - wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * - nrhs + i__1 * i__1; - if (*n >= mnthr) { - -/* - Path 2a - underdetermined, with many more columns - than rows. -*/ - - maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, - &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * - ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& - c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * - ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, ( - ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); - if (*nrhs > 1) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; - maxwrk = max(i__1,i__2); - } else { -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 1); - maxwrk = max(i__1,i__2); - } -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ", - "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; - maxwrk = max(i__1,i__2); - } else { - -/* Path 2 - remaining underdetermined cases. */ - - maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR" - , "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR", - "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * 3 + wlalsd; - maxwrk = max(i__1,i__2); - } -/* Computing MAX */ - i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), - i__2 = *m * 3 + wlalsd; - minwrk = max(i__1,i__2); - } - minwrk = min(minwrk,maxwrk); - work[1] = (doublereal) maxwrk; - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGELSD", &i__1); - return 0; - } else if (lquery) { - goto L10; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0) { - *rank = 0; - return 0; - } - -/* Get machine parameters. */ - - eps = PRECISION; - sfmin = SAFEMINIMUM; - smlnum = sfmin / eps; - bignum = 1. / smlnum; - dlabad_(&smlnum, &bignum); - -/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ - - anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); - iascl = 0; - if (anrm > 0. && anrm < smlnum) { - -/* Scale matrix norm up to SMLNUM. */ - - dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, - info); - iascl = 1; - } else if (anrm > bignum) { - -/* Scale matrix norm down to BIGNUM. */ - - dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, - info); - iascl = 2; - } else if (anrm == 0.) { - -/* Matrix all zero. Return zero solution. */ - - i__1 = max(*m,*n); - dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb); - dlaset_("F", &minmn, &c__1, &c_b2467, &c_b2467, &s[1], &c__1); - *rank = 0; - goto L10; - } - -/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ - - bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); - ibscl = 0; - if (bnrm > 0. && bnrm < smlnum) { - -/* Scale matrix norm up to SMLNUM. */ - - dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, - info); - ibscl = 1; - } else if (bnrm > bignum) { - -/* Scale matrix norm down to BIGNUM. */ - - dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, - info); - ibscl = 2; - } - -/* If M < N make sure certain entries of B are zero. */ - - if (*m < *n) { - i__1 = *n - *m; - dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[*m + 1 + b_dim1], - ldb); - } - -/* Overdetermined case. */ - - if (*m >= *n) { - -/* Path 1 - overdetermined or exactly determined. */ - - mm = *m; - if (*m >= mnthr) { - -/* Path 1a - overdetermined, with many more rows than columns. */ - - mm = *n; - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R. - (Workspace: need 2*N, prefer N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, - info); - -/* - Multiply B by transpose(Q). - (Workspace: need N+NRHS, prefer N+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ - b_offset], ldb, &work[nwork], &i__1, info); - -/* Zero out below R. */ - - if (*n > 1) { - i__1 = *n - 1; - i__2 = *n - 1; - dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &a[a_dim1 + 2], - lda); - } - } - - ie = 1; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A. - (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors of R. - (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], - &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, - rcond, rank, &work[nwork], &iwork[1], info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of R. */ - - i__1 = *lwork - nwork + 1; - dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & - b[b_offset], ldb, &work[nwork], &i__1, info); - - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( - i__1,*nrhs), i__2 = *n - *m * 3; - if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { - -/* - Path 2a - underdetermined, with many more columns than rows - and sufficient workspace for an efficient algorithm. -*/ - - ldwork = *m; -/* - Computing MAX - Computing MAX -*/ - i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = - max(i__3,*nrhs), i__4 = *n - *m * 3; - i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + - *m + *m * *nrhs; - if (*lwork >= max(i__1,i__2)) { - ldwork = *lda; - } - itau = 1; - nwork = *m + 1; - -/* - Compute A=L*Q. - (Workspace: need 2*M, prefer M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, - info); - il = nwork; - -/* Copy L to WORK(IL), zeroing out above its diagonal. */ - - dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); - i__1 = *m - 1; - i__2 = *m - 1; - dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &work[il + ldwork], - &ldwork); - ie = il + ldwork * *m; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL). - (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors of L. - (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ - itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], - ldb, rcond, rank, &work[nwork], &iwork[1], info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of L. */ - - i__1 = *lwork - nwork + 1; - dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ - itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Zero out below first M rows of B. */ - - i__1 = *n - *m; - dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[*m + 1 + b_dim1], - ldb); - nwork = itau + *m; - -/* - Multiply transpose(Q) by B. - (Workspace: need M+NRHS, prefer M+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ - b_offset], ldb, &work[nwork], &i__1, info); - - } else { - -/* Path 2 - remaining underdetermined cases. */ - - ie = 1; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A. - (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors. - (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] - , &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], - ldb, rcond, rank, &work[nwork], &iwork[1], info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of A. */ - - i__1 = *lwork - nwork + 1; - dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] - , &b[b_offset], ldb, &work[nwork], &i__1, info); - - } - } - -/* Undo scaling. */ - - if (iascl == 1) { - dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, - info); - dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, info); - } else if (iascl == 2) { - dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, - info); - dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, info); - } - if (ibscl == 1) { - dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, - info); - } else if (ibscl == 2) { - dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, - info); - } - -L10: - work[1] = (doublereal) maxwrk; - return 0; - -/* End of DGELSD */ - -} /* dgelsd_ */ - -/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *tau, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static doublereal aii; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DGEQR2 computes a QR factorization of a real m by n matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(m,n) by n upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) DOUBLE PRECISION array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEQR2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] - , &c__1, &tau[i__]); - if (i__ < *n) { - -/* Apply H(i) to A(i:m,i+1:n) from the left */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - i__2 = *m - i__ + 1; - i__3 = *n - i__; - dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = aii; - } -/* L10: */ - } - return 0; - -/* End of DGEQR2 */ - -} /* dgeqr2_ */ - -/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer * - lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, - char *, char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DGEQRF computes a QR factorization of a real M-by-N matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(M,N)-by-N upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of min(m,n) elementary reflectors (see Further - Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *n * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGEQRF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the QR factorization of the current block - A(i:m,i:i+ib-1) -*/ - - i__3 = *m - i__ + 1; - dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *m - i__ + 1; - dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i:m,i+ib:n) from the left */ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ - ib + 1; - dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & - i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib - + 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1] = (doublereal) iws; - return 0; - -/* End of DGEQRF */ - -} /* dgeqrf_ */ - -/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal * - a, integer *lda, doublereal *s, doublereal *u, integer *ldu, - doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, - integer *iwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2, i__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, ie, il, ir, iu, blk; - static doublereal dum[1], eps; - static integer ivt, iscl; - static doublereal anrm; - static integer idum[1], ierr, itau; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - extern logical lsame_(char *, char *); - static integer chunk, minmn, wrkbl, itaup, itauq, mnthr; - static logical wntqa; - static integer nwork; - static logical wntqn, wntqo, wntqs; - extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal - *, doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *); - extern doublereal dlamch_(char *), dlange_(char *, integer *, - integer *, doublereal *, integer *, doublereal *); - static integer bdspac; - extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, integer *), - dlascl_(char *, integer *, integer *, doublereal *, doublereal *, - integer *, integer *, doublereal *, integer *, integer *), - dgeqrf_(integer *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, - integer *, integer *, doublereal *, integer *, doublereal *, - integer *), dlaset_(char *, integer *, integer *, - doublereal *, doublereal *, doublereal *, integer *), - xerbla_(char *, integer *), dorgbr_(char *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static doublereal bignum; - extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *), dorgqr_(integer *, integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, integer *); - static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; - static doublereal smlnum; - static logical wntqas, lquery; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DGESDD computes the singular value decomposition (SVD) of a real - M-by-N matrix A, optionally computing the left and right singular - vectors. If singular vectors are desired, it uses a - divide-and-conquer algorithm. - - The SVD is written - - A = U * SIGMA * transpose(V) - - where SIGMA is an M-by-N matrix which is zero except for its - min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and - V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA - are the singular values of A; they are real and non-negative, and - are returned in descending order. The first min(m,n) columns of - U and V are the left and right singular vectors of A. - - Note that the routine returns VT = V**T, not V. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - Specifies options for computing all or part of the matrix U: - = 'A': all M columns of U and all N rows of V**T are - returned in the arrays U and VT; - = 'S': the first min(M,N) columns of U and the first - min(M,N) rows of V**T are returned in the arrays U - and VT; - = 'O': If M >= N, the first N columns of U are overwritten - on the array A and all rows of V**T are returned in - the array VT; - otherwise, all columns of U are returned in the - array U and the first M rows of V**T are overwritten - in the array VT; - = 'N': no columns of U or rows of V**T are computed. - - M (input) INTEGER - The number of rows of the input matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the input matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, - if JOBZ = 'O', A is overwritten with the first N columns - of U (the left singular vectors, stored - columnwise) if M >= N; - A is overwritten with the first M rows - of V**T (the right singular vectors, stored - rowwise) otherwise. - if JOBZ .ne. 'O', the contents of A are destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - S (output) DOUBLE PRECISION array, dimension (min(M,N)) - The singular values of A, sorted so that S(i) >= S(i+1). - - U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) - UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; - UCOL = min(M,N) if JOBZ = 'S'. - If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M - orthogonal matrix U; - if JOBZ = 'S', U contains the first min(M,N) columns of U - (the left singular vectors, stored columnwise); - if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1; if - JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. - - VT (output) DOUBLE PRECISION array, dimension (LDVT,N) - If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the - N-by-N orthogonal matrix V**T; - if JOBZ = 'S', VT contains the first min(M,N) rows of - V**T (the right singular vectors, stored rowwise); - if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1; if - JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; - if JOBZ = 'S', LDVT >= min(M,N). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK; - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - If JOBZ = 'N', - LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). - If JOBZ = 'O', - LWORK >= 3*min(M,N)*min(M,N) + - max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). - If JOBZ = 'S' or 'A' - LWORK >= 3*min(M,N)*min(M,N) + - max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). - For good performance, LWORK should generally be larger. - If LWORK < 0 but other input arguments are legal, WORK(1) - returns the optimal LWORK. - - IWORK (workspace) INTEGER array, dimension (8*min(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: DBDSDC did not converge, updating process failed. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --s; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - mnthr = (integer) (minmn * 11. / 6.); - wntqa = lsame_(jobz, "A"); - wntqs = lsame_(jobz, "S"); - wntqas = wntqa || wntqs; - wntqo = lsame_(jobz, "O"); - wntqn = lsame_(jobz, "N"); - minwrk = 1; - maxwrk = 1; - lquery = *lwork == -1; - - if (! (wntqa || wntqs || wntqo || wntqn)) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * - m) { - *info = -8; - } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || - wntqo && *m >= *n && *ldvt < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV.) -*/ - - if (*info == 0 && *m > 0 && *n > 0) { - if (*m >= *n) { - -/* Compute space needed for DBDSDC */ - - if (wntqn) { - bdspac = *n * 7; - } else { - bdspac = *n * 3 * *n + (*n << 2); - } - if (*m >= mnthr) { - if (wntqn) { - -/* Path 1 (M much larger than N, JOBZ='N') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n; - maxwrk = max(i__1,i__2); - minwrk = bdspac + *n; - } else if (wntqo) { - -/* Path 2 (M much larger than N, JOBZ='O') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + (*n << 1) * *n; - minwrk = bdspac + (*n << 1) * *n + *n * 3; - } else if (wntqs) { - -/* Path 3 (M much larger than N, JOBZ='S') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *n * *n; - minwrk = bdspac + *n * *n + *n * 3; - } else if (wntqa) { - -/* Path 4 (M much larger than N, JOBZ='A') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR", - " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *n * *n; - minwrk = bdspac + *n * *n + *n * 3; - } - } else { - -/* Path 5 (M at least N, but not much larger) */ - - wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - if (wntqn) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } else if (wntqo) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *n; -/* Computing MAX */ - i__1 = *m, i__2 = *n * *n + bdspac; - minwrk = *n * 3 + max(i__1,i__2); - } else if (wntqs) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } else if (wntqa) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } - } - } else { - -/* Compute space needed for DBDSDC */ - - if (wntqn) { - bdspac = *m * 7; - } else { - bdspac = *m * 3 * *m + (*m << 2); - } - if (*n >= mnthr) { - if (wntqn) { - -/* Path 1t (N much larger than M, JOBZ='N') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m; - maxwrk = max(i__1,i__2); - minwrk = bdspac + *m; - } else if (wntqo) { - -/* Path 2t (N much larger than M, JOBZ='O') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + (*m << 1) * *m; - minwrk = bdspac + (*m << 1) * *m + *m * 3; - } else if (wntqs) { - -/* Path 3t (N much larger than M, JOBZ='S') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *m; - minwrk = bdspac + *m * *m + *m * 3; - } else if (wntqa) { - -/* Path 4t (N much larger than M, JOBZ='A') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ", - " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *m; - minwrk = bdspac + *m * *m + *m * 3; - } - } else { - -/* Path 5t (N greater than M, but not much larger) */ - - wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - if (wntqn) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } else if (wntqo) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *n; -/* Computing MAX */ - i__1 = *n, i__2 = *m * *m + bdspac; - minwrk = *m * 3 + max(i__1,i__2); - } else if (wntqs) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } else if (wntqa) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" - , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } - } - } - work[1] = (doublereal) maxwrk; - } - - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGESDD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - if (*lwork >= 1) { - work[1] = 1.; - } - return 0; - } - -/* Get machine constants */ - - eps = PRECISION; - smlnum = sqrt(SAFEMINIMUM) / eps; - bignum = 1. / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = dlange_("M", m, n, &a[a_offset], lda, dum); - iscl = 0; - if (anrm > 0. && anrm < smlnum) { - iscl = 1; - dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & - ierr); - } else if (anrm > bignum) { - iscl = 1; - dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & - ierr); - } - - if (*m >= *n) { - -/* - A has at least as many rows as columns. If A has sufficiently - more rows than columns, first reduce using the QR - decomposition (if sufficient workspace available) -*/ - - if (*m >= mnthr) { - - if (wntqn) { - -/* - Path 1 (M much larger than N, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need 2*N, prefer N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out below R */ - - i__1 = *n - 1; - i__2 = *n - 1; - dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (Workspace: need 4*N, prefer 3*N+2*N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nwork = ie + *n; - -/* - Perform bidiagonal SVD, computing singular values only - (Workspace: need N+BDSPAC) -*/ - - dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2 (M much larger than N, JOBZ = 'O') - N left singular vectors to be overwritten on A and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is LDWRKR by N */ - - if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { - ldwrkr = *lda; - } else { - ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; - } - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__1 = *n - 1; - i__2 = *n - 1; - dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &work[ir + 1], - &ldwrkr); - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in VT, copying result to WORK(IR) - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* WORK(IU) is N by N */ - - iu = nwork; - nwork = iu + *n * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite WORK(IU) by left singular vectors of R - and VT by right singular vectors of R - (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &work[iu], n, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IU), storing result in WORK(IR) and copying to A - (Workspace: need 2*N*N, prefer N*N+M*N) -*/ - - i__1 = *m; - i__2 = ldwrkr; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - dgemm_("N", "N", &chunk, n, n, &c_b2453, &a[i__ + a_dim1], - lda, &work[iu], n, &c_b2467, &work[ir], &ldwrkr); - dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L10: */ - } - - } else if (wntqs) { - -/* - Path 3 (M much larger than N, JOBZ='S') - N left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is N by N */ - - ldwrkr = *n; - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__2 = *n - 1; - i__1 = *n - 1; - dlaset_("L", &i__2, &i__1, &c_b2467, &c_b2467, &work[ir + 1], - &ldwrkr); - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagoal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of R and VT - by right singular vectors of R - (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IR), storing result in U - (Workspace: need N*N) -*/ - - dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); - dgemm_("N", "N", m, n, n, &c_b2453, &a[a_offset], lda, &work[ - ir], &ldwrkr, &c_b2467, &u[u_offset], ldu); - - } else if (wntqa) { - -/* - Path 4 (M much larger than N, JOBZ='A') - M left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - itau = iu + ldwrku * *n; - nwork = itau + *n; - -/* - Compute A=Q*R, copying result to U - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Generate Q in U - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - i__2 = *lwork - nwork + 1; - dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], - &i__2, &ierr); - -/* Produce R in A, zeroing out other entries */ - - i__2 = *n - 1; - i__1 = *n - 1; - dlaset_("L", &i__2, &i__1, &c_b2467, &c_b2467, &a[a_dim1 + 2], - lda); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite WORK(IU) by left singular vectors of R and VT - by right singular vectors of R - (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in U by left singular vectors of R in - WORK(IU), storing result in A - (Workspace: need N*N) -*/ - - dgemm_("N", "N", m, n, n, &c_b2453, &u[u_offset], ldu, &work[ - iu], &ldwrku, &c_b2467, &a[a_offset], lda); - -/* Copy left singular vectors of A from A to U */ - - dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - - } - - } else { - -/* - M .LT. MNTHR - - Path 5 (M at least N, but not much larger) - Reduce to bidiagonal form without QR decomposition -*/ - - ie = 1; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Perform bidiagonal SVD, only computing singular values - (Workspace: need N+BDSPAC) -*/ - - dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - if (*lwork >= *m * *n + *n * 3 + bdspac) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - nwork = iu + ldwrku * *n; - dlaset_("F", m, n, &c_b2467, &c_b2467, &work[iu], &ldwrku); - } else { - -/* WORK( IU ) is N by N */ - - ldwrku = *n; - nwork = iu + ldwrku * *n; - -/* WORK(IR) is LDWRKR by N */ - - ir = nwork; - ldwrkr = (*lwork - *n * *n - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & - vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ - 1], info); - -/* - Overwrite VT by right singular vectors of A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - - if (*lwork >= *m * *n + *n * 3 + bdspac) { - -/* - Overwrite WORK(IU) by left singular vectors of A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - -/* Copy left singular vectors of A from WORK(IU) to A */ - - dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); - } else { - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by left singular vectors of - bidiagonal matrix in WORK(IU), storing result in - WORK(IR) and copying to A - (Workspace: need 2*N*N, prefer N*N+M*N) -*/ - - i__2 = *m; - i__1 = ldwrkr; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - dgemm_("N", "N", &chunk, n, n, &c_b2453, &a[i__ + - a_dim1], lda, &work[iu], &ldwrku, &c_b2467, & - work[ir], &ldwrkr); - dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L20: */ - } - } - - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - dlaset_("F", m, n, &c_b2467, &c_b2467, &u[u_offset], ldu); - dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 3*N, prefer 2*N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else if (wntqa) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - dlaset_("F", m, m, &c_b2467, &c_b2467, &u[u_offset], ldu); - dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* Set the right corner of U to identity matrix */ - - i__1 = *m - *n; - i__2 = *m - *n; - dlaset_("F", &i__1, &i__2, &c_b2467, &c_b2453, &u[*n + 1 + (* - n + 1) * u_dim1], ldu); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } else { - -/* - A has more columns than rows. If A has sufficiently more - columns than rows, first reduce using the LQ decomposition (if - sufficient workspace available) -*/ - - if (*n >= mnthr) { - - if (wntqn) { - -/* - Path 1t (N much larger than M, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need 2*M, prefer M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out above L */ - - i__1 = *m - 1; - i__2 = *m - 1; - dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &a[(a_dim1 << - 1) + 1], lda); - ie = 1; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (Workspace: need 4*M, prefer 3*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nwork = ie + *m; - -/* - Perform bidiagonal SVD, computing singular values only - (Workspace: need M+BDSPAC) -*/ - - dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2t (N much larger than M, JOBZ='O') - M right singular vectors to be overwritten on A and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* IVT is M by M */ - - il = ivt + *m * *m; - if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { - -/* WORK(IL) is M by N */ - - ldwrkl = *m; - chunk = *n; - } else { - ldwrkl = *m; - chunk = (*lwork - *m * *m) / *m; - } - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy L to WORK(IL), zeroing about above it */ - - dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__1 = *m - 1; - i__2 = *m - 1; - dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &work[il + - ldwrkl], &ldwrkl); - -/* - Generate Q in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U, and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M+M*M+BDSPAC) -*/ - - dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], m, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of L and WORK(IVT) - by right singular vectors of L - (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by Q - in A, storing result in WORK(IL) and copying to A - (Workspace: need 2*M*M, prefer M*M+M*N) -*/ - - i__1 = *n; - i__2 = chunk; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - dgemm_("N", "N", m, &blk, m, &c_b2453, &work[ivt], m, &a[ - i__ * a_dim1 + 1], lda, &c_b2467, &work[il], & - ldwrkl); - dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 - + 1], lda); -/* L30: */ - } - - } else if (wntqs) { - -/* - Path 3t (N much larger than M, JOBZ='S') - M right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - il = 1; - -/* WORK(IL) is M by M */ - - ldwrkl = *m; - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy L to WORK(IL), zeroing out above it */ - - dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__2 = *m - 1; - i__1 = *m - 1; - dlaset_("U", &i__2, &i__1, &c_b2467, &c_b2467, &work[il + - ldwrkl], &ldwrkl); - -/* - Generate Q in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IU), copying result to U - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of L and VT - by right singular vectors of L - (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IL) by - Q in A, storing result in VT - (Workspace: need M*M) -*/ - - dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); - dgemm_("N", "N", m, n, m, &c_b2453, &work[il], &ldwrkl, &a[ - a_offset], lda, &c_b2467, &vt[vt_offset], ldvt); - - } else if (wntqa) { - -/* - Path 4t (N much larger than M, JOBZ='A') - N right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* WORK(IVT) is M by M */ - - ldwkvt = *m; - itau = ivt + ldwkvt * *m; - nwork = itau + *m; - -/* - Compute A=L*Q, copying result to VT - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Generate Q in VT - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ - nwork], &i__2, &ierr); - -/* Produce L in A, zeroing out other entries */ - - i__2 = *m - 1; - i__1 = *m - 1; - dlaset_("U", &i__2, &i__1, &c_b2467, &c_b2467, &a[(a_dim1 << - 1) + 1], lda); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M+M*M+BDSPAC) -*/ - - dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] - , info); - -/* - Overwrite U by left singular vectors of L and WORK(IVT) - by right singular vectors of L - (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by - Q in VT, storing result in A - (Workspace: need M*M) -*/ - - dgemm_("N", "N", m, n, m, &c_b2453, &work[ivt], &ldwkvt, &vt[ - vt_offset], ldvt, &c_b2467, &a[a_offset], lda); - -/* Copy right singular vectors of A from A to VT */ - - dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - - } - - } else { - -/* - N .LT. MNTHR - - Path 5t (N greater than M, but not much larger) - Reduce to bidiagonal form without LQ decomposition -*/ - - ie = 1; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) -*/ - - i__2 = *lwork - nwork + 1; - dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Perform bidiagonal SVD, only computing singular values - (Workspace: need M+BDSPAC) -*/ - - dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - } else if (wntqo) { - ldwkvt = *m; - ivt = nwork; - if (*lwork >= *m * *n + *m * 3 + bdspac) { - -/* WORK( IVT ) is M by N */ - - dlaset_("F", m, n, &c_b2467, &c_b2467, &work[ivt], & - ldwkvt); - nwork = ivt + ldwkvt * *n; - } else { - -/* WORK( IVT ) is M by M */ - - nwork = ivt + ldwkvt * *m; - il = nwork; - -/* WORK(IL) is M by CHUNK */ - - chunk = (*lwork - *m * *m - *m * 3) / *m; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M*M+BDSPAC) -*/ - - dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] - , info); - -/* - Overwrite U by left singular vectors of A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *m * 3 + bdspac) { - -/* - Overwrite WORK(IVT) by left singular vectors of A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, - &ierr); - -/* Copy right singular vectors of A from WORK(IVT) to A */ - - dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); - } else { - -/* - Generate P**T in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by right singular vectors of - bidiagonal matrix in WORK(IVT), storing result in - WORK(IL) and copying to A - (Workspace: need 2*M*M, prefer M*M+M*N) -*/ - - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - dgemm_("N", "N", m, &blk, m, &c_b2453, &work[ivt], & - ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b2467, & - work[il], m); - dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + - 1], lda); -/* L40: */ - } - } - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - dlaset_("F", m, n, &c_b2467, &c_b2467, &vt[vt_offset], ldvt); - dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 3*M, prefer 2*M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else if (wntqa) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - dlaset_("F", n, n, &c_b2467, &c_b2467, &vt[vt_offset], ldvt); - dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* Set the right corner of VT to identity matrix */ - - i__1 = *n - *m; - i__2 = *n - *m; - dlaset_("F", &i__1, &i__2, &c_b2467, &c_b2453, &vt[*m + 1 + (* - m + 1) * vt_dim1], ldvt); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 2*M+N, prefer 2*M+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } - -/* Undo scaling if necessary */ - - if (iscl == 1) { - if (anrm > bignum) { - dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - if (anrm < smlnum) { - dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - } - -/* Return optimal workspace in WORK(1) */ - - work[1] = (doublereal) maxwrk; - - return 0; - -/* End of DGESDD */ - -} /* dgesdd_ */ - -/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer - *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *, - integer *, integer *, doublereal *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DGESV computes the solution to a real system of linear equations - A * X = B, - where A is an N-by-N matrix and X and B are N-by-NRHS matrices. - - The LU decomposition with partial pivoting and row interchanges is - used to factor A as - A = P * L * U, - where P is a permutation matrix, L is unit lower triangular, and U is - upper triangular. The factored form of A is then used to solve the - system of equations A * X = B. - - Arguments - ========= - - N (input) INTEGER - The number of linear equations, i.e., the order of the - matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the N-by-N coefficient matrix A. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (output) INTEGER array, dimension (N) - The pivot indices that define the permutation matrix P; - row i of the matrix was interchanged with row IPIV(i). - - B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) - On entry, the N-by-NRHS matrix of right hand side matrix B. - On exit, if INFO = 0, the N-by-NRHS solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, so the solution could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*nrhs < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGESV ", &i__1); - return 0; - } - -/* Compute the LU factorization of A. */ - - dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); - if (*info == 0) { - -/* Solve the system A*X = B, overwriting B with X. */ - - dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ - b_offset], ldb, info); - } - return 0; - -/* End of DGESV */ - -} /* dgesv_ */ - -/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer * - lda, integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - - /* Local variables */ - static integer j, jp; - extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *), dscal_(integer *, doublereal *, doublereal *, integer - *), dswap_(integer *, doublereal *, integer *, doublereal *, - integer *); - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1992 - - - Purpose - ======= - - DGETF2 computes an LU factorization of a general m-by-n matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 2 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the m by n matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, U(k,k) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGETF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - -/* Find pivot and test for singularity. */ - - i__2 = *m - j + 1; - jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); - ipiv[j] = jp; - if (a[jp + j * a_dim1] != 0.) { - -/* Apply the interchange to columns 1:N. */ - - if (jp != j) { - dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); - } - -/* Compute elements J+1:M of J-th column. */ - - if (j < *m) { - i__2 = *m - j; - d__1 = 1. / a[j + j * a_dim1]; - dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); - } - - } else if (*info == 0) { - - *info = j; - } - - if (j < min(*m,*n)) { - -/* Update trailing submatrix. */ - - i__2 = *m - j; - i__3 = *n - j; - dger_(&i__2, &i__3, &c_b2589, &a[j + 1 + j * a_dim1], &c__1, &a[j - + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], - lda); - } -/* L10: */ - } - return 0; - -/* End of DGETF2 */ - -} /* dgetf2_ */ - -/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * - lda, integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - - /* Local variables */ - static integer i__, j, jb, nb; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer iinfo; - extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *), dgetf2_( - integer *, integer *, doublereal *, integer *, integer *, integer - *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, - integer *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DGETRF computes an LU factorization of a general M-by-N matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 3 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the M-by-N matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGETRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - if (nb <= 1 || nb >= min(*m,*n)) { - -/* Use unblocked code. */ - - dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); - } else { - -/* Use blocked code. */ - - i__1 = min(*m,*n); - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__3 = min(*m,*n) - j + 1; - jb = min(i__3,nb); - -/* - Factor diagonal and subdiagonal blocks and test for exact - singularity. -*/ - - i__3 = *m - j + 1; - dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); - -/* Adjust INFO and the pivot indices. */ - - if (*info == 0 && iinfo > 0) { - *info = iinfo + j - 1; - } -/* Computing MIN */ - i__4 = *m, i__5 = j + jb - 1; - i__3 = min(i__4,i__5); - for (i__ = j; i__ <= i__3; ++i__) { - ipiv[i__] = j - 1 + ipiv[i__]; -/* L10: */ - } - -/* Apply interchanges to columns 1:J-1. */ - - i__3 = j - 1; - i__4 = j + jb - 1; - dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); - - if (j + jb <= *n) { - -/* Apply interchanges to columns J+JB:N. */ - - i__3 = *n - j - jb + 1; - i__4 = j + jb - 1; - dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & - ipiv[1], &c__1); - -/* Compute block row of U. */ - - i__3 = *n - j - jb + 1; - dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & - c_b2453, &a[j + j * a_dim1], lda, &a[j + (j + jb) * - a_dim1], lda); - if (j + jb <= *m) { - -/* Update trailing submatrix. */ - - i__3 = *m - j - jb + 1; - i__4 = *n - j - jb + 1; - dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, - &c_b2589, &a[j + jb + j * a_dim1], lda, &a[j + (j - + jb) * a_dim1], lda, &c_b2453, &a[j + jb + (j + - jb) * a_dim1], lda); - } - } -/* L20: */ - } - } - return 0; - -/* End of DGETRF */ - -} /* dgetrf_ */ - -/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, - doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * - ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *), xerbla_( - char *, integer *), dlaswp_(integer *, doublereal *, - integer *, integer *, integer *, integer *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DGETRS solves a system of linear equations - A * X = B or A' * X = B - with a general N-by-N matrix A using the LU factorization computed - by DGETRF. - - Arguments - ========= - - TRANS (input) CHARACTER*1 - Specifies the form of the system of equations: - = 'N': A * X = B (No transpose) - = 'T': A'* X = B (Transpose) - = 'C': A'* X = B (Conjugate transpose = Transpose) - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The factors L and U from the factorization A = P*L*U - as computed by DGETRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (input) INTEGER array, dimension (N) - The pivot indices from DGETRF; for 1<=i<=N, row i of the - matrix was interchanged with row IPIV(i). - - B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - notran = lsame_(trans, "N"); - if (! notran && ! lsame_(trans, "T") && ! lsame_( - trans, "C")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DGETRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (notran) { - -/* - Solve A * X = B. - - Apply row interchanges to the right hand sides. -*/ - - dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); - -/* Solve L*X = B, overwriting B with X. */ - - dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b2453, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2453, - &a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A' * X = B. - - Solve U'*X = B, overwriting B with X. -*/ - - dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2453, & - a[a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b2453, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Apply row interchanges to the solution vectors. */ - - dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); - } - - return 0; - -/* End of DGETRS */ - -} /* dgetrs_ */ - -/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo, - integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, - doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4, - i__5; - doublereal d__1, d__2; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__, j, k, l; - static doublereal s[225] /* was [15][15] */, v[16]; - static integer i1, i2, ii, nh, nr, ns, nv; - static doublereal vv[16]; - static integer itn; - static doublereal tau; - static integer its; - static doublereal ulp, tst1; - static integer maxb; - static doublereal absw; - static integer ierr; - static doublereal unfl, temp, ovfl; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - static integer itemp; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static logical initz, wantt, wantz; - extern doublereal dlapy2_(doublereal *, doublereal *); - extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); - - extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, - integer *, doublereal *); - extern integer idamax_(integer *, doublereal *, integer *); - extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, - doublereal *); - extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, integer *, doublereal *, integer *, - integer *), dlacpy_(char *, integer *, integer *, doublereal *, - integer *, doublereal *, integer *), dlaset_(char *, - integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_( - char *, integer *, integer *, doublereal *, doublereal *, - doublereal *, integer *, doublereal *); - static doublereal smlnum; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H - and, optionally, the matrices T and Z from the Schur decomposition - H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur - form), and Z is the orthogonal matrix of Schur vectors. - - Optionally Z may be postmultiplied into an input orthogonal matrix Q, - so that this routine can give the Schur factorization of a matrix A - which has been reduced to the Hessenberg form H by the orthogonal - matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. - - Arguments - ========= - - JOB (input) CHARACTER*1 - = 'E': compute eigenvalues only; - = 'S': compute eigenvalues and the Schur form T. - - COMPZ (input) CHARACTER*1 - = 'N': no Schur vectors are computed; - = 'I': Z is initialized to the unit matrix and the matrix Z - of Schur vectors of H is returned; - = 'V': Z must contain an orthogonal matrix Q on entry, and - the product Q*Z is returned. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to DGEBAL, and then passed to SGEHRD - when the matrix output by DGEBAL is reduced to Hessenberg - form. Otherwise ILO and IHI should be set to 1 and N - respectively. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - H (input/output) DOUBLE PRECISION array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if JOB = 'S', H contains the upper quasi-triangular - matrix T from the Schur decomposition (the Schur form); - 2-by-2 diagonal blocks (corresponding to complex conjugate - pairs of eigenvalues) are returned in standard form, with - H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', - the contents of H are unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - WR (output) DOUBLE PRECISION array, dimension (N) - WI (output) DOUBLE PRECISION array, dimension (N) - The real and imaginary parts, respectively, of the computed - eigenvalues. If two eigenvalues are computed as a complex - conjugate pair, they are stored in consecutive elements of - WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and - WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the - same order as on the diagonal of the Schur form returned in - H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 - diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and - WI(i+1) = -WI(i). - - Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) - If COMPZ = 'N': Z is not referenced. - If COMPZ = 'I': on entry, Z need not be set, and on exit, Z - contains the orthogonal matrix Z of the Schur vectors of H. - If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, - which is assumed to be equal to the unit matrix except for - the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. - Normally Q is the orthogonal matrix generated by DORGHR after - the call to DGEHRD which formed the Hessenberg matrix H. - - LDZ (input) INTEGER - The leading dimension of the array Z. - LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, DHSEQR failed to compute all of the - eigenvalues in a total of 30*(IHI-ILO+1) iterations; - elements 1:ilo-1 and i+1:n of WR and WI contain those - eigenvalues which have been successfully computed. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --wr; - --wi; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - wantt = lsame_(job, "S"); - initz = lsame_(compz, "I"); - wantz = initz || lsame_(compz, "V"); - - *info = 0; - work[1] = (doublereal) max(1,*n); - lquery = *lwork == -1; - if (! lsame_(job, "E") && ! wantt) { - *info = -1; - } else if (! lsame_(compz, "N") && ! wantz) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*ldh < max(1,*n)) { - *info = -7; - } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { - *info = -11; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DHSEQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Initialize Z, if necessary */ - - if (initz) { - dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz); - } - -/* Store the eigenvalues isolated by DGEBAL. */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.; -/* L10: */ - } - i__1 = *n; - for (i__ = *ihi + 1; i__ <= i__1; ++i__) { - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.; -/* L20: */ - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - wr[*ilo] = h__[*ilo + *ilo * h_dim1]; - wi[*ilo] = 0.; - return 0; - } - -/* - Set rows and columns ILO to IHI to zero below the first - subdiagonal. -*/ - - i__1 = *ihi - 2; - for (j = *ilo; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j + 2; i__ <= i__2; ++i__) { - h__[i__ + j * h_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } - nh = *ihi - *ilo + 1; - -/* - Determine the order of the multi-shift QR algorithm to be used. - - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = job; - i__3[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = job; - i__3[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (ns <= 2 || ns > nh || maxb >= nh) { - -/* Use the standard double-shift algorithm */ - - dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ - 1], ilo, ihi, &z__[z_offset], ldz, info); - return 0; - } - maxb = max(3,maxb); -/* Computing MIN */ - i__1 = min(ns,maxb); - ns = min(i__1,15); - -/* - Now 2 < NS <= MAXB < NH. - - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = SAFEMINIMUM; - ovfl = 1. / unfl; - dlabad_(&unfl, &ovfl); - ulp = PRECISION; - smlnum = unfl * (nh / ulp); - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of multiple-shift QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of at most MAXB. Each iteration of the loop - works with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L50: - l = *ilo; - if (i__ < *ilo) { - goto L170; - } - -/* - Perform multiple-shift QR iterations on rows and columns ILO to I - until a submatrix of order at most MAXB splits off at the bottom - because a subdiagonal element has become negligible. -*/ - - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = - h__[k + k * h_dim1], abs(d__2)); - if (tst1 == 0.) { - i__4 = i__ - l + 1; - tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1] - ); - } -/* Computing MAX */ - d__2 = ulp * tst1; - if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2, - smlnum)) { - goto L70; - } -/* L60: */ - } -L70: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible. */ - - h__[l + (l - 1) * h_dim1] = 0.; - } - -/* Exit from loop if a submatrix of order <= MAXB has split off. */ - - if (l >= i__ - maxb + 1) { - goto L160; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! wantt) { - i1 = l; - i2 = i__; - } - - if (its == 20 || its == 30) { - -/* Exceptional shifts. */ - - i__2 = i__; - for (ii = i__ - ns + 1; ii <= i__2; ++ii) { - wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + ( - d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5; - wi[ii] = 0.; -/* L80: */ - } - } else { - -/* Use eigenvalues of trailing submatrix of order NS as shifts. */ - - dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * - h_dim1], ldh, s, &c__15); - dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ - - ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset], - ldz, &ierr); - if (ierr > 0) { - -/* - If DLAHQR failed to compute all NS eigenvalues, use the - unconverged diagonal elements as the remaining shifts. -*/ - - i__2 = ierr; - for (ii = 1; ii <= i__2; ++ii) { - wr[i__ - ns + ii] = s[ii + ii * 15 - 16]; - wi[i__ - ns + ii] = 0.; -/* L90: */ - } - } - } - -/* - Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) - where G is the Hessenberg submatrix H(L:I,L:I) and w is - the vector of shifts (stored in WR and WI). The result is - stored in the local array V. -*/ - - v[0] = 1.; - i__2 = ns + 1; - for (ii = 2; ii <= i__2; ++ii) { - v[ii - 1] = 0.; -/* L100: */ - } - nv = 1; - i__2 = i__; - for (j = i__ - ns + 1; j <= i__2; ++j) { - if (wi[j] >= 0.) { - if (wi[j] == 0.) { - -/* real shift */ - - i__4 = nv + 1; - dcopy_(&i__4, v, &c__1, vv, &c__1); - i__4 = nv + 1; - d__1 = -wr[j]; - dgemv_("No transpose", &i__4, &nv, &c_b2453, &h__[l + l * - h_dim1], ldh, vv, &c__1, &d__1, v, &c__1); - ++nv; - } else if (wi[j] > 0.) { - -/* complex conjugate pair of shifts */ - - i__4 = nv + 1; - dcopy_(&i__4, v, &c__1, vv, &c__1); - i__4 = nv + 1; - d__1 = wr[j] * -2.; - dgemv_("No transpose", &i__4, &nv, &c_b2453, &h__[l + l * - h_dim1], ldh, v, &c__1, &d__1, vv, &c__1); - i__4 = nv + 1; - itemp = idamax_(&i__4, vv, &c__1); -/* Computing MAX */ - d__2 = (d__1 = vv[itemp - 1], abs(d__1)); - temp = 1. / max(d__2,smlnum); - i__4 = nv + 1; - dscal_(&i__4, &temp, vv, &c__1); - absw = dlapy2_(&wr[j], &wi[j]); - temp = temp * absw * absw; - i__4 = nv + 2; - i__5 = nv + 1; - dgemv_("No transpose", &i__4, &i__5, &c_b2453, &h__[l + l - * h_dim1], ldh, vv, &c__1, &temp, v, &c__1); - nv += 2; - } - -/* - Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, - reset it to the unit vector. -*/ - - itemp = idamax_(&nv, v, &c__1); - temp = (d__1 = v[itemp - 1], abs(d__1)); - if (temp == 0.) { - v[0] = 1.; - i__4 = nv; - for (ii = 2; ii <= i__4; ++ii) { - v[ii - 1] = 0.; -/* L110: */ - } - } else { - temp = max(temp,smlnum); - d__1 = 1. / temp; - dscal_(&nv, &d__1, v, &c__1); - } - } -/* L120: */ - } - -/* Multiple-shift QR step */ - - i__2 = i__ - 1; - for (k = l; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__4 = ns + 1, i__5 = i__ - k + 1; - nr = min(i__4,i__5); - if (k > l) { - dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - dlarfg_(&nr, v, &v[1], &c__1, &tau); - if (k > l) { - h__[k + (k - 1) * h_dim1] = v[0]; - i__4 = i__; - for (ii = k + 1; ii <= i__4; ++ii) { - h__[ii + (k - 1) * h_dim1] = 0.; -/* L130: */ - } - } - v[0] = 1.; - -/* - Apply G from the left to transform the rows of the matrix in - columns K to I2. -*/ - - i__4 = i2 - k + 1; - dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, & - work[1]); - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+NR,I). - - Computing MIN -*/ - i__5 = k + nr; - i__4 = min(i__5,i__) - i1 + 1; - dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, - &work[1]); - - if (wantz) { - -/* Accumulate transformations in the matrix Z */ - - dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], - ldz, &work[1]); - } -/* L140: */ - } - -/* L150: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L160: - -/* - A submatrix of order <= MAXB in rows and columns L to I has split - off. Use the double-shift QR algorithm to handle it. -*/ - - dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1], - ilo, ihi, &z__[z_offset], ldz, info); - if (*info > 0) { - return 0; - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with a new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L50; - -L170: - work[1] = (doublereal) max(1,*n); - return 0; - -/* End of DHSEQR */ - -} /* dhseqr_ */ - -/* Subroutine */ int dlabad_(doublereal *small, doublereal *large) -{ - /* Builtin functions */ - double d_lg10(doublereal *), sqrt(doublereal); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLABAD takes as input the values computed by DLAMCH for underflow and - overflow, and returns the square root of each of these values if the - log of LARGE is sufficiently large. This subroutine is intended to - identify machines with a large exponent range, such as the Crays, and - redefine the underflow and overflow limits to be the square roots of - the values computed by DLAMCH. This subroutine is needed because - DLAMCH does not compensate for poor arithmetic in the upper half of - the exponent range, as is found on a Cray. - - Arguments - ========= - - SMALL (input/output) DOUBLE PRECISION - On entry, the underflow threshold as computed by DLAMCH. - On exit, if LOG10(LARGE) is sufficiently large, the square - root of SMALL, otherwise unchanged. - - LARGE (input/output) DOUBLE PRECISION - On entry, the overflow threshold as computed by DLAMCH. - On exit, if LOG10(LARGE) is sufficiently large, the square - root of LARGE, otherwise unchanged. - - ===================================================================== - - - If it looks like we're on a Cray, take the square root of - SMALL and LARGE to avoid overflow and underflow problems. -*/ - - if (d_lg10(large) > 2e3) { - *small = sqrt(*small); - *large = sqrt(*large); - } - - return 0; - -/* End of DLABAD */ - -} /* dlabad_ */ - -/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal * - a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, - doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer - *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dgemv_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLABRD reduces the first NB rows and columns of a real general - m by n matrix A to upper or lower bidiagonal form by an orthogonal - transformation Q' * A * P, and returns the matrices X and Y which - are needed to apply the transformation to the unreduced part of A. - - If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower - bidiagonal form. - - This is an auxiliary routine called by DGEBRD - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. - - N (input) INTEGER - The number of columns in the matrix A. - - NB (input) INTEGER - The number of leading rows and columns of A to be reduced. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, the first NB rows and columns of the matrix are - overwritten; the rest of the array is unchanged. - If m >= n, elements on and below the diagonal in the first NB - columns, with the array TAUQ, represent the orthogonal - matrix Q as a product of elementary reflectors; and - elements above the diagonal in the first NB rows, with the - array TAUP, represent the orthogonal matrix P as a product - of elementary reflectors. - If m < n, elements below the diagonal in the first NB - columns, with the array TAUQ, represent the orthogonal - matrix Q as a product of elementary reflectors, and - elements on and above the diagonal in the first NB rows, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (NB) - The diagonal elements of the first NB rows and columns of - the reduced matrix. D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (NB) - The off-diagonal elements of the first NB rows and columns of - the reduced matrix. - - TAUQ (output) DOUBLE PRECISION array dimension (NB) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) DOUBLE PRECISION array, dimension (NB) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - X (output) DOUBLE PRECISION array, dimension (LDX,NB) - The m-by-nb matrix X required to update the unreduced part - of A. - - LDX (input) INTEGER - The leading dimension of the array X. LDX >= M. - - Y (output) DOUBLE PRECISION array, dimension (LDY,NB) - The n-by-nb matrix Y required to update the unreduced part - of A. - - LDY (output) INTEGER - The leading dimension of the array Y. LDY >= N. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors. - - If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in - A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The elements of the vectors v and u together form the m-by-nb matrix - V and the nb-by-n matrix U' which are needed, with X and Y, to apply - the transformation to the unreduced part of the matrix, using a block - update of the form: A := A - V*Y' - X*U'. - - The contents of A on exit are illustrated by the following examples - with nb = 2: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) - ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) - ( v1 v2 a a a ) ( v1 1 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix which is unchanged, - vi denotes an element of the vector defining H(i), and ui an element - of the vector defining G(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:m,i) */ - - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + a_dim1], - lda, &y[i__ + y_dim1], ldy, &c_b2453, &a[i__ + i__ * - a_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + x_dim1], - ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2453, &a[i__ + i__ * - a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * - a_dim1], &c__1, &tauq[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - a[i__ + i__ * a_dim1] = 1.; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + (i__ + 1) - * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, & - c_b2467, &y[i__ + 1 + i__ * y_dim1], &c__1) - ; - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + a_dim1], - lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2453, & - y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &x[i__ + x_dim1], - ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(i__ + 1) * - a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b2453, &y[i__ + 1 + i__ * y_dim1], &c__1) - ; - i__2 = *n - i__; - dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - -/* Update A(i,i+1:n) */ - - i__2 = *n - i__; - dgemv_("No transpose", &i__2, &i__, &c_b2589, &y[i__ + 1 + - y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2453, &a[i__ - + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(i__ + 1) * - a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2453, &a[ - i__ + (i__ + 1) * a_dim1], lda); - -/* Generate reflection P(i) to annihilate A(i,i+2:n) */ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( - i__3,*n) * a_dim1], lda, &taup[i__]); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - a[i__ + (i__ + 1) * a_dim1] = 1.; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + ( - i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], - lda, &c_b2467, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - dgemv_("Transpose", &i__2, &i__, &c_b2453, &y[i__ + 1 + - y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b2467, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - dgemv_("No transpose", &i__2, &i__, &c_b2589, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b2467, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i,i:n) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + y_dim1], - ldy, &a[i__ + a_dim1], lda, &c_b2453, &a[i__ + i__ * - a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[i__ * a_dim1 + 1], - lda, &x[i__ + x_dim1], ldx, &c_b2453, &a[i__ + i__ * - a_dim1], lda); - -/* Generate reflection P(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * - a_dim1], lda, &taup[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - if (i__ < *m) { - a[i__ + i__ * a_dim1] = 1.; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + - i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, & - c_b2467, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &y[i__ + y_dim1], - ldy, &a[i__ + i__ * a_dim1], lda, &c_b2467, &x[i__ * - x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ * - a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, & - c_b2467, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - -/* Update A(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 + - a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2453, &a[i__ - + 1 + i__ * a_dim1], &c__1); - i__2 = *m - i__; - dgemv_("No transpose", &i__2, &i__, &c_b2589, &x[i__ + 1 + - x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2453, & - a[i__ + 1 + i__ * a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ - - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + - i__ * a_dim1], &c__1, &tauq[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + (i__ - + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], & - c__1, &c_b2467, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b2467, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2453, & - y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - dgemv_("Transpose", &i__2, &i__, &c_b2453, &x[i__ + 1 + - x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b2467, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - dgemv_("Transpose", &i__, &i__2, &c_b2589, &a[(i__ + 1) * - a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b2453, &y[i__ + 1 + i__ * y_dim1], &c__1) - ; - i__2 = *n - i__; - dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - } -/* L20: */ - } - } - return 0; - -/* End of DLABRD */ - -} /* dlabrd_ */ - -/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal * - a, integer *lda, doublereal *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLACPY copies all or part of a two-dimensional matrix A to another - matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper triangle - or trapezoid is accessed; if UPLO = 'L', only the lower - triangle or trapezoid is accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) DOUBLE PRECISION array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L30: */ - } -/* L40: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L50: */ - } -/* L60: */ - } - } - return 0; - -/* End of DLACPY */ - -} /* dlacpy_ */ - -/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__, - doublereal *d__, doublereal *p, doublereal *q) -{ - static doublereal e, f; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLADIV performs complex division in real arithmetic - - a + i*b - p + i*q = --------- - c + i*d - - The algorithm is due to Robert L. Smith and can be found - in D. Knuth, The art of Computer Programming, Vol.2, p.195 - - Arguments - ========= - - A (input) DOUBLE PRECISION - B (input) DOUBLE PRECISION - C (input) DOUBLE PRECISION - D (input) DOUBLE PRECISION - The scalars a, b, c, and d in the above expression. - - P (output) DOUBLE PRECISION - Q (output) DOUBLE PRECISION - The scalars p and q in the above expression. - - ===================================================================== -*/ - - - if (abs(*d__) < abs(*c__)) { - e = *d__ / *c__; - f = *c__ + *d__ * e; - *p = (*a + *b * e) / f; - *q = (*b - *a * e) / f; - } else { - e = *c__ / *d__; - f = *d__ + *c__ * e; - *p = (*b + *a * e) / f; - *q = (-(*a) + *b * e) / f; - } - - return 0; - -/* End of DLADIV */ - -} /* dladiv_ */ - -/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__, - doublereal *rt1, doublereal *rt2) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal ab, df, tb, sm, rt, adf, acmn, acmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix - [ A B ] - [ B C ]. - On return, RT1 is the eigenvalue of larger absolute value, and RT2 - is the eigenvalue of smaller absolute value. - - Arguments - ========= - - A (input) DOUBLE PRECISION - The (1,1) element of the 2-by-2 matrix. - - B (input) DOUBLE PRECISION - The (1,2) and (2,1) elements of the 2-by-2 matrix. - - C (input) DOUBLE PRECISION - The (2,2) element of the 2-by-2 matrix. - - RT1 (output) DOUBLE PRECISION - The eigenvalue of larger absolute value. - - RT2 (output) DOUBLE PRECISION - The eigenvalue of smaller absolute value. - - Further Details - =============== - - RT1 is accurate to a few ulps barring over/underflow. - - RT2 may be inaccurate if there is massive cancellation in the - determinant A*C-B*B; higher precision or correctly rounded or - correctly truncated arithmetic would be needed to compute RT2 - accurately in all cases. - - Overflow is possible only if RT1 is within a factor of 5 of overflow. - Underflow is harmless if the input data is 0 or exceeds - underflow_threshold / macheps. - - ===================================================================== - - - Compute the eigenvalues -*/ - - sm = *a + *c__; - df = *a - *c__; - adf = abs(df); - tb = *b + *b; - ab = abs(tb); - if (abs(*a) > abs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - d__1 = ab / adf; - rt = adf * sqrt(d__1 * d__1 + 1.); - } else if (adf < ab) { -/* Computing 2nd power */ - d__1 = adf / ab; - rt = ab * sqrt(d__1 * d__1 + 1.); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.); - } - if (sm < 0.) { - *rt1 = (sm - rt) * .5; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.) { - *rt1 = (sm + rt) * .5; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5; - *rt2 = rt * -.5; - } - return 0; - -/* End of DLAE2 */ - -} /* dlae2_ */ - -/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n, - doublereal *d__, doublereal *e, doublereal *q, integer *ldq, - doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, - integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; - static doublereal temp; - static integer curr; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer iperm; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer indxq, iwrem; - extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *, - integer *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *); - static integer iqptr; - extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *, - integer *, integer *, integer *, doublereal *, doublereal *, - integer *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, integer *, integer *, doublereal - *, doublereal *, integer *, integer *); - static integer tlvls; - extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *); - static integer igivcl; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer igivnm, submat, curprb, subpbs, igivpt; - extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *); - static integer curlvl, matsiz, iprmpt, smlsiz; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAED0 computes all eigenvalues and corresponding eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - = 2: Compute eigenvalues and eigenvectors of tridiagonal - matrix. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the main diagonal of the tridiagonal matrix. - On exit, its eigenvalues. - - E (input) DOUBLE PRECISION array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) - On entry, Q must contain an N-by-N orthogonal matrix. - If ICOMPQ = 0 Q is not referenced. - If ICOMPQ = 1 On entry, Q is a subset of the columns of the - orthogonal matrix used to reduce the full - matrix to tridiagonal form corresponding to - the subset of the full matrix which is being - decomposed at this time. - If ICOMPQ = 2 On entry, Q will be the identity matrix. - On exit, Q contains the eigenvectors of the - tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. If eigenvectors are - desired, then LDQ >= max(1,N). In any case, LDQ >= 1. - - QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) - Referenced only when ICOMPQ = 1. Used to store parts of - the eigenvector matrix when the updating matrix multiplies - take place. - - LDQS (input) INTEGER - The leading dimension of the array QSTORE. If ICOMPQ = 1, - then LDQS >= max(1,N). In any case, LDQS >= 1. - - WORK (workspace) DOUBLE PRECISION array, - If ICOMPQ = 0 or 1, the dimension of WORK must be at least - 1 + 3*N + 2*N*lg N + 2*N**2 - ( lg( N ) = smallest integer k - such that 2^k >= N ) - If ICOMPQ = 2, the dimension of WORK must be at least - 4*N + N**2. - - IWORK (workspace) INTEGER array, - If ICOMPQ = 0 or 1, the dimension of IWORK must be at least - 6 + 6*N + 5*N*lg N. - ( lg( N ) = smallest integer k - such that 2^k >= N ) - If ICOMPQ = 2, the dimension of IWORK must be at least - 3 + 5*N. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - qstore_dim1 = *ldqs; - qstore_offset = 1 + qstore_dim1; - qstore -= qstore_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 2) { - *info = -1; - } else if (*icompq == 1 && *qsiz < max(0,*n)) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -7; - } else if (*ldqs < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED0", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Determine the size and placement of the submatrices, and save in - the leading elements of IWORK. -*/ - - iwork[1] = *n; - subpbs = 1; - tlvls = 0; -L10: - if (iwork[subpbs] > smlsiz) { - for (j = subpbs; j >= 1; --j) { - iwork[j * 2] = (iwork[j] + 1) / 2; - iwork[(j << 1) - 1] = iwork[j] / 2; -/* L20: */ - } - ++tlvls; - subpbs <<= 1; - goto L10; - } - i__1 = subpbs; - for (j = 2; j <= i__1; ++j) { - iwork[j] += iwork[j - 1]; -/* L30: */ - } - -/* - Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 - using rank-1 modifications (cuts). -*/ - - spm1 = subpbs - 1; - i__1 = spm1; - for (i__ = 1; i__ <= i__1; ++i__) { - submat = iwork[i__] + 1; - smm1 = submat - 1; - d__[smm1] -= (d__1 = e[smm1], abs(d__1)); - d__[submat] -= (d__1 = e[smm1], abs(d__1)); -/* L40: */ - } - - indxq = (*n << 2) + 3; - if (*icompq != 2) { - -/* - Set up workspaces for eigenvalues only/accumulate new vectors - routine -*/ - - temp = log((doublereal) (*n)) / log(2.); - lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - iprmpt = indxq + *n + 1; - iperm = iprmpt + *n * lgn; - iqptr = iperm + *n * lgn; - igivpt = iqptr + *n + 2; - igivcl = igivpt + *n * lgn; - - igivnm = 1; - iq = igivnm + (*n << 1) * lgn; -/* Computing 2nd power */ - i__1 = *n; - iwrem = iq + i__1 * i__1 + 1; - -/* Initialize pointers */ - - i__1 = subpbs; - for (i__ = 0; i__ <= i__1; ++i__) { - iwork[iprmpt + i__] = 1; - iwork[igivpt + i__] = 1; -/* L50: */ - } - iwork[iqptr] = 1; - } - -/* - Solve each submatrix eigenproblem at the bottom of the divide and - conquer tree. -*/ - - curr = 0; - i__1 = spm1; - for (i__ = 0; i__ <= i__1; ++i__) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[1]; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 1] - iwork[i__]; - } - if (*icompq == 2) { - dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + - submat * q_dim1], ldq, &work[1], info); - if (*info != 0) { - goto L130; - } - } else { - dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + - iwork[iqptr + curr]], &matsiz, &work[1], info); - if (*info != 0) { - goto L130; - } - if (*icompq == 1) { - dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b2453, &q[submat * - q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]] - , &matsiz, &c_b2467, &qstore[submat * qstore_dim1 + 1] - , ldqs); - } -/* Computing 2nd power */ - i__2 = matsiz; - iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; - ++curr; - } - k = 1; - i__2 = iwork[i__ + 1]; - for (j = submat; j <= i__2; ++j) { - iwork[indxq + j] = k; - ++k; -/* L60: */ - } -/* L70: */ - } - -/* - Successively merge eigensystems of adjacent submatrices - into eigensystem for the corresponding larger matrix. - - while ( SUBPBS > 1 ) -*/ - - curlvl = 1; -L80: - if (subpbs > 1) { - spm2 = subpbs - 2; - i__1 = spm2; - for (i__ = 0; i__ <= i__1; i__ += 2) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[2]; - msd2 = iwork[1]; - curprb = 0; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 2] - iwork[i__]; - msd2 = matsiz / 2; - ++curprb; - } - -/* - Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) - into an eigensystem of size MATSIZ. - DLAED1 is used only for the full eigensystem of a tridiagonal - matrix. - DLAED7 handles the cases in which eigenvalues only or eigenvalues - and eigenvectors of a full symmetric matrix (which was reduced to - tridiagonal form) are desired. -*/ - - if (*icompq == 2) { - dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], - ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & - msd2, &work[1], &iwork[subpbs + 1], info); - } else { - dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ - submat], &qstore[submat * qstore_dim1 + 1], ldqs, & - iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & - work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] - , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & - work[iwrem], &iwork[subpbs + 1], info); - } - if (*info != 0) { - goto L130; - } - iwork[i__ / 2 + 1] = iwork[i__ + 2]; -/* L90: */ - } - subpbs /= 2; - ++curlvl; - goto L80; - } - -/* - end while - - Re-merge the eigenvalues/vectors which were deflated at the final - merge step. -*/ - - if (*icompq == 1) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; - dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 - + 1], &c__1); -/* L100: */ - } - dcopy_(n, &work[1], &c__1, &d__[1], &c__1); - } else if (*icompq == 2) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; - dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); -/* L110: */ - } - dcopy_(n, &work[1], &c__1, &d__[1], &c__1); - dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; -/* L120: */ - } - dcopy_(n, &work[1], &c__1, &d__[1], &c__1); - } - goto L140; - -L130: - *info = submat * (*n + 1) + submat + matsiz - 1; - -L140: - return 0; - -/* End of DLAED0 */ - -} /* dlaed0_ */ - -/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q, - integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, - doublereal *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Local variables */ - static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer indxp; - extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *, integer *, integer *, integer *), dlaed3_(integer *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, doublereal *, doublereal *, integer *, integer *, - doublereal *, doublereal *, integer *); - static integer idlmda; - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAED1 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles - the case in which eigenvalues only or eigenvalues and eigenvectors - of a full symmetric matrix (which was reduced to tridiagonal form) - are desired. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine DLAED2. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine DLAED4 (as called by DLAED3). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input/output) INTEGER array, dimension (N) - On entry, the permutation which separately sorts the two - subproblems in D into ascending order. - On exit, the permutation which will reintegrate the - subproblems back into sorted order, - i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. - - RHO (input) DOUBLE PRECISION - The subdiagonal entry used to create the rank-1 modification. - - CUTPNT (input) INTEGER - The location of the last eigenvalue in the leading sub-matrix. - min(1,N) <= CUTPNT <= N/2. - - WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) - - IWORK (workspace) INTEGER array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } else if (*ldq < max(1,*n)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MIN */ - i__1 = 1, i__2 = *n / 2; - if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { - *info = -7; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED1", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are integer pointers which indicate - the portion of the workspace - used by a particular array in DLAED2 and DLAED3. -*/ - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq2 = iw + *n; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); - zpp1 = *cutpnt + 1; - i__1 = *n - *cutpnt; - dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); - -/* Deflate eigenvalues. */ - - dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ - iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ - indxc], &iwork[indxp], &iwork[coltyp], info); - - if (*info != 0) { - goto L20; - } - -/* Solve Secular Equation. */ - - if (k != 0) { - is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + - 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; - dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], - &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ - is], info); - if (*info != 0) { - goto L20; - } - -/* Prepare the INDXQ sorting permutation. */ - - n1 = k; - n2 = *n - k; - dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L10: */ - } - } - -L20: - return 0; - -/* End of DLAED1 */ - -} /* dlaed1_ */ - -/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal * - d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, - doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, - integer *indx, integer *indxc, integer *indxp, integer *coltyp, - integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - doublereal d__1, d__2, d__3, d__4; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal c__; - static integer i__, j; - static doublereal s, t; - static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; - static doublereal eps, tau, tol; - static integer psm[4], imax, jmax; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer ctot[4]; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dcopy_(integer *, doublereal *, integer *, doublereal - *, integer *); - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), dlacpy_(char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLAED2 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny entry in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - K (output) INTEGER - The number of non-deflated eigenvalues, and the order of the - related secular equation. 0 <= K <=N. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - N1 (input) INTEGER - The location of the last eigenvalue in the leading sub-matrix. - min(1,N) <= N1 <= N/2. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, D contains the eigenvalues of the two submatrices to - be combined. - On exit, D contains the trailing (N-K) updated eigenvalues - (those which were deflated) sorted into increasing order. - - Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) - On entry, Q contains the eigenvectors of two submatrices in - the two square blocks with corners at (1,1), (N1,N1) - and (N1+1, N1+1), (N,N). - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input/output) INTEGER array, dimension (N) - The permutation which separately sorts the two sub-problems - in D into ascending order. Note that elements in the second - half of this permutation must first have N1 added to their - values. Destroyed on exit. - - RHO (input/output) DOUBLE PRECISION - On entry, the off-diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. - On exit, RHO has been modified to the value required by - DLAED3. - - Z (input) DOUBLE PRECISION array, dimension (N) - On entry, Z contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). - On exit, the contents of Z have been destroyed by the updating - process. - - DLAMDA (output) DOUBLE PRECISION array, dimension (N) - A copy of the first K eigenvalues which will be used by - DLAED3 to form the secular equation. - - W (output) DOUBLE PRECISION array, dimension (N) - The first k values of the final deflation-altered z-vector - which will be passed to DLAED3. - - Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) - A copy of the first K eigenvectors which will be used by - DLAED3 in a matrix multiply (DGEMM) to solve for the new - eigenvectors. - - INDX (workspace) INTEGER array, dimension (N) - The permutation used to sort the contents of DLAMDA into - ascending order. - - INDXC (output) INTEGER array, dimension (N) - The permutation used to arrange the columns of the deflated - Q matrix into three groups: the first group contains non-zero - elements only at and above N1, the second contains - non-zero elements only below N1, and the third is dense. - - INDXP (workspace) INTEGER array, dimension (N) - The permutation used to place deflated values of D at the end - of the array. INDXP(1:K) points to the nondeflated D-values - and INDXP(K+1:N) points to the deflated eigenvalues. - - COLTYP (workspace/output) INTEGER array, dimension (N) - During execution, a label which will indicate which of the - following types a column in the Q2 matrix is: - 1 : non-zero in the upper half only; - 2 : dense; - 3 : non-zero in the lower half only; - 4 : deflated. - On exit, COLTYP(i) is the number of columns of type i, - for i=1 to 4 only. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --z__; - --dlamda; - --w; - --q2; - --indx; - --indxc; - --indxp; - --coltyp; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MIN */ - i__1 = 1, i__2 = *n / 2; - if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { - *info = -3; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n2 = *n - *n1; - n1p1 = *n1 + 1; - - if (*rho < 0.) { - dscal_(&n2, &c_b2589, &z__[n1p1], &c__1); - } - -/* - Normalize z so that norm(z) = 1. Since z is the concatenation of - two normalized vectors, norm2(z) = sqrt(2). -*/ - - t = 1. / sqrt(2.); - dscal_(n, &t, &z__[1], &c__1); - -/* RHO = ABS( norm(z)**2 * RHO ) */ - - *rho = (d__1 = *rho * 2., abs(d__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = n1p1; i__ <= i__1; ++i__) { - indxq[i__] += *n1; -/* L10: */ - } - -/* re-integrate the deflated parts from the last pass */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; -/* L20: */ - } - dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indx[i__] = indxq[indxc[i__]]; -/* L30: */ - } - -/* Calculate the allowable deflation tolerance */ - - imax = idamax_(n, &z__[1], &c__1); - jmax = idamax_(n, &d__[1], &c__1); - eps = EPSILON; -/* Computing MAX */ - d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) - ; - tol = eps * 8. * max(d__3,d__4); - -/* - If the rank-1 modifier is small enough, no more needs to be done - except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { - *k = 0; - iq2 = 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__ = indx[j]; - dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); - dlamda[j] = d__[i__]; - iq2 += *n; -/* L40: */ - } - dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); - dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); - goto L190; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - i__1 = *n1; - for (i__ = 1; i__ <= i__1; ++i__) { - coltyp[i__] = 1; -/* L50: */ - } - i__1 = *n; - for (i__ = n1p1; i__ <= i__1; ++i__) { - coltyp[i__] = 3; -/* L60: */ - } - - - *k = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - nj = indx[j]; - if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - coltyp[nj] = 4; - indxp[k2] = nj; - if (j == *n) { - goto L100; - } - } else { - pj = nj; - goto L80; - } -/* L70: */ - } -L80: - ++j; - nj = indx[j]; - if (j > *n) { - goto L100; - } - if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - coltyp[nj] = 4; - indxp[k2] = nj; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[pj]; - c__ = z__[nj]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = dlapy2_(&c__, &s); - t = d__[nj] - d__[pj]; - c__ /= tau; - s = -s / tau; - if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { - -/* Deflation is possible. */ - - z__[nj] = tau; - z__[pj] = 0.; - if (coltyp[nj] != coltyp[pj]) { - coltyp[nj] = 2; - } - coltyp[pj] = 4; - drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & - c__, &s); -/* Computing 2nd power */ - d__1 = c__; -/* Computing 2nd power */ - d__2 = s; - t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); -/* Computing 2nd power */ - d__1 = s; -/* Computing 2nd power */ - d__2 = c__; - d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); - d__[pj] = t; - --k2; - i__ = 1; -L90: - if (k2 + i__ <= *n) { - if (d__[pj] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = pj; - ++i__; - goto L90; - } else { - indxp[k2 + i__ - 1] = pj; - } - } else { - indxp[k2 + i__ - 1] = pj; - } - pj = nj; - } else { - ++(*k); - dlamda[*k] = d__[pj]; - w[*k] = z__[pj]; - indxp[*k] = pj; - pj = nj; - } - } - goto L80; -L100: - -/* Record the last eigenvalue. */ - - ++(*k); - dlamda[*k] = d__[pj]; - w[*k] = z__[pj]; - indxp[*k] = pj; - -/* - Count up the total number of the various types of columns, then - form a permutation which positions the four column types into - four uniform groups (although one or more of these groups may be - empty). -*/ - - for (j = 1; j <= 4; ++j) { - ctot[j - 1] = 0; -/* L110: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - ct = coltyp[j]; - ++ctot[ct - 1]; -/* L120: */ - } - -/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ - - psm[0] = 1; - psm[1] = ctot[0] + 1; - psm[2] = psm[1] + ctot[1]; - psm[3] = psm[2] + ctot[2]; - *k = *n - ctot[3]; - -/* - Fill out the INDXC array so that the permutation which it induces - will place all type-1 columns first, all type-2 columns next, - then all type-3's, and finally all type-4's. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - js = indxp[j]; - ct = coltyp[js]; - indx[psm[ct - 1]] = js; - indxc[psm[ct - 1]] = j; - ++psm[ct - 1]; -/* L130: */ - } - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - i__ = 1; - iq1 = 1; - iq2 = (ctot[0] + ctot[1]) * *n1 + 1; - i__1 = ctot[0]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); - z__[i__] = d__[js]; - ++i__; - iq1 += *n1; -/* L140: */ - } - - i__1 = ctot[1]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); - dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); - z__[i__] = d__[js]; - ++i__; - iq1 += *n1; - iq2 += n2; -/* L150: */ - } - - i__1 = ctot[2]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); - z__[i__] = d__[js]; - ++i__; - iq2 += n2; -/* L160: */ - } - - iq1 = iq2; - i__1 = ctot[3]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); - iq2 += *n; - z__[i__] = d__[js]; - ++i__; -/* L170: */ - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); - i__1 = *n - *k; - dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); - -/* Copy CTOT into COLTYP for referencing in DLAED3. */ - - for (j = 1; j <= 4; ++j) { - coltyp[j] = ctot[j - 1]; -/* L180: */ - } - -L190: - return 0; - -/* End of DLAED2 */ - -} /* dlaed2_ */ - -/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * - d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, - doublereal *q2, integer *indx, integer *ctot, doublereal *w, - doublereal *s, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer i__, j, n2, n12, ii, n23, iq2; - static doublereal temp; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *), - dcopy_(integer *, doublereal *, integer *, doublereal *, integer - *), dlaed4_(integer *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, integer *); - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *), - dlaset_(char *, integer *, integer *, doublereal *, doublereal *, - doublereal *, integer *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAED3 finds the roots of the secular equation, as defined by the - values in D, W, and RHO, between 1 and K. It makes the - appropriate calls to DLAED4 and then updates the eigenvectors by - multiplying the matrix of eigenvectors of the pair of eigensystems - being combined by the matrix of eigenvectors of the K-by-K system - which is solved here. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - K (input) INTEGER - The number of terms in the rational function to be solved by - DLAED4. K >= 0. - - N (input) INTEGER - The number of rows and columns in the Q matrix. - N >= K (deflation may result in N>K). - - N1 (input) INTEGER - The location of the last eigenvalue in the leading submatrix. - min(1,N) <= N1 <= N/2. - - D (output) DOUBLE PRECISION array, dimension (N) - D(I) contains the updated eigenvalues for - 1 <= I <= K. - - Q (output) DOUBLE PRECISION array, dimension (LDQ,N) - Initially the first K columns are used as workspace. - On output the columns 1 to K contain - the updated eigenvectors. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - RHO (input) DOUBLE PRECISION - The value of the parameter in the rank one update equation. - RHO >= 0 required. - - DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. May be changed on output by - having lowest order bit set to zero on Cray X-MP, Cray Y-MP, - Cray-2, or Cray C-90, as described above. - - Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) - The first K columns of this matrix contain the non-deflated - eigenvectors for the split problem. - - INDX (input) INTEGER array, dimension (N) - The permutation used to arrange the columns of the deflated - Q matrix into three groups (see DLAED2). - The rows of the eigenvectors found by DLAED4 must be likewise - permuted before the matrix multiply can take place. - - CTOT (input) INTEGER array, dimension (4) - A count of the total number of the various types of columns - in Q, as described in INDX. The fourth column type is any - column which has been deflated. - - W (input/output) DOUBLE PRECISION array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating vector. Destroyed on - output. - - S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K - Will contain the eigenvectors of the repaired matrix which - will be multiplied by the previously accumulated eigenvectors - to update the system. - - LDS (input) INTEGER - The leading dimension of S. LDS >= max(1,K). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dlamda; - --q2; - --indx; - --ctot; - --w; - --s; - - /* Function Body */ - *info = 0; - - if (*k < 0) { - *info = -1; - } else if (*n < *k) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED3", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 0) { - return 0; - } - -/* - Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), - which on any of these machines zeros out the bottommost - bit of DLAMDA(I) if it is 1; this makes the subsequent - subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DLAMDA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DLAMDA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; -/* L10: */ - } - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], - info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - goto L120; - } -/* L20: */ - } - - if (*k == 1) { - goto L110; - } - if (*k == 2) { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - w[1] = q[j * q_dim1 + 1]; - w[2] = q[j * q_dim1 + 2]; - ii = indx[1]; - q[j * q_dim1 + 1] = w[ii]; - ii = indx[2]; - q[j * q_dim1 + 2] = w[ii]; -/* L30: */ - } - goto L110; - } - -/* Compute updated W. */ - - dcopy_(k, &w[1], &c__1, &s[1], &c__1); - -/* Initialize W(I) = Q(I,I) */ - - i__1 = *ldq + 1; - dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L40: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L50: */ - } -/* L60: */ - } - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - d__1 = sqrt(-w[i__]); - w[i__] = d_sign(&d__1, &s[i__]); -/* L70: */ - } - -/* Compute eigenvectors of the modified rank-1 modification. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - s[i__] = w[i__] / q[i__ + j * q_dim1]; -/* L80: */ - } - temp = dnrm2_(k, &s[1], &c__1); - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - ii = indx[i__]; - q[i__ + j * q_dim1] = s[ii] / temp; -/* L90: */ - } -/* L100: */ - } - -/* Compute the updated eigenvectors. */ - -L110: - - n2 = *n - *n1; - n12 = ctot[1] + ctot[2]; - n23 = ctot[2] + ctot[3]; - - dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); - iq2 = *n1 * n12 + 1; - if (n23 != 0) { - dgemm_("N", "N", &n2, k, &n23, &c_b2453, &q2[iq2], &n2, &s[1], &n23, & - c_b2467, &q[*n1 + 1 + q_dim1], ldq); - } else { - dlaset_("A", &n2, k, &c_b2467, &c_b2467, &q[*n1 + 1 + q_dim1], ldq); - } - - dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); - if (n12 != 0) { - dgemm_("N", "N", n1, k, &n12, &c_b2453, &q2[1], n1, &s[1], &n12, & - c_b2467, &q[q_offset], ldq); - } else { - dlaset_("A", n1, k, &c_b2467, &c_b2467, &q[q_dim1 + 1], ldq); - } - - -L120: - return 0; - -/* End of DLAED3 */ - -} /* dlaed3_ */ - -/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__, - doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, - integer *info) -{ - /* System generated locals */ - integer i__1; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal a, b, c__; - static integer j; - static doublereal w; - static integer ii; - static doublereal dw, zz[3]; - static integer ip1; - static doublereal del, eta, phi, eps, tau, psi; - static integer iim1, iip1; - static doublereal dphi, dpsi; - static integer iter; - static doublereal temp, prew, temp1, dltlb, dltub, midpt; - static integer niter; - static logical swtch; - extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *), dlaed6_(integer *, - logical *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *); - static logical swtch3; - - static logical orgati; - static doublereal erretm, rhoinv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - December 23, 1999 - - - Purpose - ======= - - This subroutine computes the I-th updated eigenvalue of a symmetric - rank-one modification to a diagonal matrix whose elements are - given in the array d, and that - - D(i) < D(j) for i < j - - and that RHO > 0. This is arranged by the calling routine, and is - no loss in generality. The rank-one modified system is thus - - diag( D ) + RHO * Z * Z_transpose. - - where we assume the Euclidean norm of Z is 1. - - The method consists of approximating the rational functions in the - secular equation by simpler interpolating rational functions. - - Arguments - ========= - - N (input) INTEGER - The length of all arrays. - - I (input) INTEGER - The index of the eigenvalue to be computed. 1 <= I <= N. - - D (input) DOUBLE PRECISION array, dimension (N) - The original eigenvalues. It is assumed that they are in - order, D(I) < D(J) for I < J. - - Z (input) DOUBLE PRECISION array, dimension (N) - The components of the updating vector. - - DELTA (output) DOUBLE PRECISION array, dimension (N) - If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th - component. If N = 1, then DELTA(1) = 1. The vector DELTA - contains the information necessary to construct the - eigenvectors. - - RHO (input) DOUBLE PRECISION - The scalar in the symmetric updating formula. - - DLAM (output) DOUBLE PRECISION - The computed lambda_I, the I-th updated eigenvalue. - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, the updating process failed. - - Internal Parameters - =================== - - Logical variable ORGATI (origin-at-i?) is used for distinguishing - whether D(i) or D(i+1) is treated as the origin. - - ORGATI = .true. origin at i - ORGATI = .false. origin at i+1 - - Logical variable SWTCH3 (switch-for-3-poles?) is for noting - if we are working with THREE poles! - - MAXIT is the maximum number of iterations allowed for each - eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Since this routine is called in an inner loop, we do no argument - checking. - - Quick return for N=1 and 2. -*/ - - /* Parameter adjustments */ - --delta; - --z__; - --d__; - - /* Function Body */ - *info = 0; - if (*n == 1) { - -/* Presumably, I=1 upon entry */ - - *dlam = d__[1] + *rho * z__[1] * z__[1]; - delta[1] = 1.; - return 0; - } - if (*n == 2) { - dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); - return 0; - } - -/* Compute machine epsilon */ - - eps = EPSILON; - rhoinv = 1. / *rho; - -/* The case I = N */ - - if (*i__ == *n) { - -/* Initialize some basic variables */ - - ii = *n - 1; - niter = 1; - -/* Calculate initial guess */ - - midpt = *rho / 2.; - -/* - If ||Z||_2 is not one, then TEMP should be set to - RHO * ||Z||_2^2 / TWO -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - midpt; -/* L10: */ - } - - psi = 0.; - i__1 = *n - 2; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / delta[j]; -/* L20: */ - } - - c__ = rhoinv + psi; - w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* - n]; - - if (w <= 0.) { - temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) - + z__[*n] * z__[*n] / *rho; - if (c__ <= temp) { - tau = *rho; - } else { - del = d__[*n] - d__[*n - 1]; - a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] - ; - b = z__[*n] * z__[*n] * del; - if (a < 0.) { - tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); - } - } - -/* - It can be proved that - D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO -*/ - - dltlb = midpt; - dltub = *rho; - } else { - del = d__[*n] - d__[*n - 1]; - a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; - b = z__[*n] * z__[*n] * del; - if (a < 0.) { - tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); - } - -/* - It can be proved that - D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 -*/ - - dltlb = 0.; - dltub = midpt; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - tau; -/* L30: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L40: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi - + dphi); - - w = rhoinv + phi + psi; - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - *dlam = d__[*i__] + tau; - goto L250; - } - - if (w <= 0.) { - dltlb = max(dltlb,tau); - } else { - dltub = min(dltub,tau); - } - -/* Calculate the new step */ - - ++niter; - c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; - a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( - dpsi + dphi); - b = delta[*n - 1] * delta[*n] * w; - if (c__ < 0.) { - c__ = abs(c__); - } - if (c__ == 0.) { -/* - ETA = B/A - ETA = RHO - TAU -*/ - eta = dltub - tau; - } else if (a >= 0.) { - eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ - * 2.); - } else { - eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) - ); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.) { - eta = -w / (dpsi + dphi); - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.) { - eta = (dltub - tau) / 2.; - } else { - eta = (dltlb - tau) / 2.; - } - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L50: */ - } - - tau += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L60: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi - + dphi); - - w = rhoinv + phi + psi; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 30; ++niter) { - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - *dlam = d__[*i__] + tau; - goto L250; - } - - if (w <= 0.) { - dltlb = max(dltlb,tau); - } else { - dltub = min(dltub,tau); - } - -/* Calculate the new step */ - - c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; - a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * - (dpsi + dphi); - b = delta[*n - 1] * delta[*n] * w; - if (a >= 0.) { - eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } else { - eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.) { - eta = -w / (dpsi + dphi); - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.) { - eta = (dltub - tau) / 2.; - } else { - eta = (dltlb - tau) / 2.; - } - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L70: */ - } - - tau += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L80: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; -/* L90: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - *dlam = d__[*i__] + tau; - goto L250; - -/* End for the case I = N */ - - } else { - -/* The case for I < N */ - - niter = 1; - ip1 = *i__ + 1; - -/* Calculate initial guess */ - - del = d__[ip1] - d__[*i__]; - midpt = del / 2.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - midpt; -/* L100: */ - } - - psi = 0.; - i__1 = *i__ - 1; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / delta[j]; -/* L110: */ - } - - phi = 0.; - i__1 = *i__ + 2; - for (j = *n; j >= i__1; --j) { - phi += z__[j] * z__[j] / delta[j]; -/* L120: */ - } - c__ = rhoinv + psi + phi; - w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / - delta[ip1]; - - if (w > 0.) { - -/* - d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 - - We choose d(i) as origin. -*/ - - orgati = TRUE_; - a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; - b = z__[*i__] * z__[*i__] * del; - if (a > 0.) { - tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } else { - tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } - dltlb = 0.; - dltub = midpt; - } else { - -/* - (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) - - We choose d(i+1) as origin. -*/ - - orgati = FALSE_; - a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; - b = z__[ip1] * z__[ip1] * del; - if (a < 0.) { - tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( - d__1)))); - } else { - tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / - (c__ * 2.); - } - dltlb = -midpt; - dltub = 0.; - } - - if (orgati) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - tau; -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[ip1] - tau; -/* L140: */ - } - } - if (orgati) { - ii = *i__; - } else { - ii = *i__ + 1; - } - iim1 = ii - 1; - iip1 = ii + 1; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L150: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L160: */ - } - - w = rhoinv + phi + psi; - -/* - W is the value of the secular function with - its ii-th element removed. -*/ - - swtch3 = FALSE_; - if (orgati) { - if (w < 0.) { - swtch3 = TRUE_; - } - } else { - if (w > 0.) { - swtch3 = TRUE_; - } - } - if (ii == 1 || ii == *n) { - swtch3 = FALSE_; - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w += temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + - abs(tau) * dw; - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - goto L250; - } - - if (w <= 0.) { - dltlb = max(dltlb,tau); - } else { - dltub = min(dltub,tau); - } - -/* Calculate the new step */ - - ++niter; - if (! swtch3) { - if (orgati) { -/* Computing 2nd power */ - d__1 = z__[*i__] / delta[*i__]; - c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * - d__1); - } else { -/* Computing 2nd power */ - d__1 = z__[ip1] / delta[ip1]; - c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * - d__1); - } - a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * - dw; - b = delta[*i__] * delta[ip1] * w; - if (c__ == 0.) { - if (a == 0.) { - if (orgati) { - a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * - (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * - (dpsi + dphi); - } - } - eta = b / a; - } else if (a <= 0.) { - eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } else { - eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - temp = rhoinv + psi + phi; - if (orgati) { - temp1 = z__[iim1] / delta[iim1]; - temp1 *= temp1; - c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ - iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); - } else { - temp1 = z__[iip1] / delta[iip1]; - temp1 *= temp1; - c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ - iim1]) * temp1; - zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); - zz[2] = z__[iip1] * z__[iip1]; - } - zz[1] = z__[ii] * z__[ii]; - dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); - if (*info != 0) { - goto L250; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.) { - eta = -w / dw; - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.) { - eta = (dltub - tau) / 2.; - } else { - eta = (dltlb - tau) / 2.; - } - } - - prew = w; - -/* L170: */ - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L180: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L190: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L200: */ - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( - d__1 = tau + eta, abs(d__1)) * dw; - - swtch = FALSE_; - if (orgati) { - if (-w > abs(prew) / 10.) { - swtch = TRUE_; - } - } else { - if (w > abs(prew) / 10.) { - swtch = TRUE_; - } - } - - tau += eta; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 30; ++niter) { - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - goto L250; - } - - if (w <= 0.) { - dltlb = max(dltlb,tau); - } else { - dltub = min(dltub,tau); - } - -/* Calculate the new step */ - - if (! swtch3) { - if (! swtch) { - if (orgati) { -/* Computing 2nd power */ - d__1 = z__[*i__] / delta[*i__]; - c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( - d__1 * d__1); - } else { -/* Computing 2nd power */ - d__1 = z__[ip1] / delta[ip1]; - c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * - (d__1 * d__1); - } - } else { - temp = z__[ii] / delta[ii]; - if (orgati) { - dpsi += temp * temp; - } else { - dphi += temp * temp; - } - c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; - } - a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] - * dw; - b = delta[*i__] * delta[ip1] * w; - if (c__ == 0.) { - if (a == 0.) { - if (! swtch) { - if (orgati) { - a = z__[*i__] * z__[*i__] + delta[ip1] * - delta[ip1] * (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ - *i__] * (dpsi + dphi); - } - } else { - a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] - * delta[ip1] * dphi; - } - } - eta = b / a; - } else if (a <= 0.) { - eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) - / (c__ * 2.); - } else { - eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, - abs(d__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - temp = rhoinv + psi + phi; - if (swtch) { - c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; - zz[0] = delta[iim1] * delta[iim1] * dpsi; - zz[2] = delta[iip1] * delta[iip1] * dphi; - } else { - if (orgati) { - temp1 = z__[iim1] / delta[iim1]; - temp1 *= temp1; - c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - - d__[iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + - dphi); - } else { - temp1 = z__[iip1] / delta[iip1]; - temp1 *= temp1; - c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - - d__[iim1]) * temp1; - zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - - temp1)); - zz[2] = z__[iip1] * z__[iip1]; - } - } - dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, - info); - if (*info != 0) { - goto L250; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.) { - eta = -w / dw; - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.) { - eta = (dltub - tau) / 2.; - } else { - eta = (dltlb - tau) / 2.; - } - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L210: */ - } - - tau += eta; - prew = w; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L220: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L230: */ - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. - + abs(tau) * dw; - if (w * prew > 0. && abs(w) > abs(prew) / 10.) { - swtch = ! swtch; - } - -/* L240: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - - } - -L250: - - return 0; - -/* End of DLAED4 */ - -} /* dlaed4_ */ - -/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, - doublereal *delta, doublereal *rho, doublereal *dlam) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal b, c__, w, del, tau, temp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - This subroutine computes the I-th eigenvalue of a symmetric rank-one - modification of a 2-by-2 diagonal matrix - - diag( D ) + RHO * Z * transpose(Z) . - - The diagonal elements in the array D are assumed to satisfy - - D(i) < D(j) for i < j . - - We also assume RHO > 0 and that the Euclidean norm of the vector - Z is one. - - Arguments - ========= - - I (input) INTEGER - The index of the eigenvalue to be computed. I = 1 or I = 2. - - D (input) DOUBLE PRECISION array, dimension (2) - The original eigenvalues. We assume D(1) < D(2). - - Z (input) DOUBLE PRECISION array, dimension (2) - The components of the updating vector. - - DELTA (output) DOUBLE PRECISION array, dimension (2) - The vector DELTA contains the information necessary - to construct the eigenvectors. - - RHO (input) DOUBLE PRECISION - The scalar in the symmetric updating formula. - - DLAM (output) DOUBLE PRECISION - The computed lambda_I, the I-th updated eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --delta; - --z__; - --d__; - - /* Function Body */ - del = d__[2] - d__[1]; - if (*i__ == 1) { - w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; - if (w > 0.) { - b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[1] * z__[1] * del; - -/* B > ZERO, always */ - - tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); - *dlam = d__[1] + tau; - delta[1] = -z__[1] / tau; - delta[2] = z__[2] / (del - tau); - } else { - b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * del; - if (b > 0.) { - tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); - } else { - tau = (b - sqrt(b * b + c__ * 4.)) / 2.; - } - *dlam = d__[2] + tau; - delta[1] = -z__[1] / (del + tau); - delta[2] = -z__[2] / tau; - } - temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); - delta[1] /= temp; - delta[2] /= temp; - } else { - -/* Now I=2 */ - - b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * del; - if (b > 0.) { - tau = (b + sqrt(b * b + c__ * 4.)) / 2.; - } else { - tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); - } - *dlam = d__[2] + tau; - delta[1] = -z__[1] / (del + tau); - delta[2] = -z__[2] / tau; - temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); - delta[1] /= temp; - delta[2] /= temp; - } - return 0; - -/* End OF DLAED5 */ - -} /* dlaed5_ */ - -/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal * - rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * - tau, integer *info) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - doublereal d__1, d__2, d__3, d__4; - - /* Builtin functions */ - double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); - - /* Local variables */ - static doublereal a, b, c__, f; - static integer i__; - static doublereal fc, df, ddf, eta, eps, base; - static integer iter; - static doublereal temp, temp1, temp2, temp3, temp4; - static logical scale; - static integer niter; - static doublereal small1, small2, sminv1, sminv2; - - static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAED6 computes the positive or negative root (closest to the origin) - of - z(1) z(2) z(3) - f(x) = rho + --------- + ---------- + --------- - d(1)-x d(2)-x d(3)-x - - It is assumed that - - if ORGATI = .true. the root is between d(2) and d(3); - otherwise it is between d(1) and d(2) - - This routine will be called by DLAED4 when necessary. In most cases, - the root sought is the smallest in magnitude, though it might not be - in some extremely rare situations. - - Arguments - ========= - - KNITER (input) INTEGER - Refer to DLAED4 for its significance. - - ORGATI (input) LOGICAL - If ORGATI is true, the needed root is between d(2) and - d(3); otherwise it is between d(1) and d(2). See - DLAED4 for further details. - - RHO (input) DOUBLE PRECISION - Refer to the equation f(x) above. - - D (input) DOUBLE PRECISION array, dimension (3) - D satisfies d(1) < d(2) < d(3). - - Z (input) DOUBLE PRECISION array, dimension (3) - Each of the elements in z must be positive. - - FINIT (input) DOUBLE PRECISION - The value of f at 0. It is more accurate than the one - evaluated inside this routine (if someone wants to do - so). - - TAU (output) DOUBLE PRECISION - The root of the equation f(x). - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, failure to converge - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - --d__; - - /* Function Body */ - - *info = 0; - - niter = 1; - *tau = 0.; - if (*kniter == 2) { - if (*orgati) { - temp = (d__[3] - d__[2]) / 2.; - c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); - a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; - b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; - } else { - temp = (d__[1] - d__[2]) / 2.; - c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); - a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; - b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; - } -/* Computing MAX */ - d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); - temp = max(d__1,d__2); - a /= temp; - b /= temp; - c__ /= temp; - if (c__ == 0.) { - *tau = b / a; - } else if (a <= 0.) { - *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } else { - *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) - )); - } - temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) + - z__[3] / (d__[3] - *tau); - if (abs(*finit) <= abs(temp)) { - *tau = 0.; - } - } - -/* - On first call to routine, get machine parameters for - possible scaling to avoid overflow -*/ - - if (first) { - eps = EPSILON; - base = BASE; - i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.); - small1 = pow_di(&base, &i__1); - sminv1 = 1. / small1; - small2 = small1 * small1; - sminv2 = sminv1 * sminv1; - first = FALSE_; - } - -/* - Determine if scaling of inputs necessary to avoid overflow - when computing 1/TEMP**3 -*/ - - if (*orgati) { -/* Computing MIN */ - d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * - tau, abs(d__2)); - temp = min(d__3,d__4); - } else { -/* Computing MIN */ - d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * - tau, abs(d__2)); - temp = min(d__3,d__4); - } - scale = FALSE_; - if (temp <= small1) { - scale = TRUE_; - if (temp <= small2) { - -/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ - - sclfac = sminv2; - sclinv = small2; - } else { - -/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ - - sclfac = sminv1; - sclinv = small1; - } - -/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ - - for (i__ = 1; i__ <= 3; ++i__) { - dscale[i__ - 1] = d__[i__] * sclfac; - zscale[i__ - 1] = z__[i__] * sclfac; -/* L10: */ - } - *tau *= sclfac; - } else { - -/* Copy D and Z to DSCALE and ZSCALE */ - - for (i__ = 1; i__ <= 3; ++i__) { - dscale[i__ - 1] = d__[i__]; - zscale[i__ - 1] = z__[i__]; -/* L20: */ - } - } - - fc = 0.; - df = 0.; - ddf = 0.; - for (i__ = 1; i__ <= 3; ++i__) { - temp = 1. / (dscale[i__ - 1] - *tau); - temp1 = zscale[i__ - 1] * temp; - temp2 = temp1 * temp; - temp3 = temp2 * temp; - fc += temp1 / dscale[i__ - 1]; - df += temp2; - ddf += temp3; -/* L30: */ - } - f = *finit + *tau * fc; - - if (abs(f) <= 0.) { - goto L60; - } - -/* - Iteration begins - - It is not hard to see that - - 1) Iterations will go up monotonically - if FINIT < 0; - - 2) Iterations will go down monotonically - if FINIT > 0. -*/ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - - if (*orgati) { - temp1 = dscale[1] - *tau; - temp2 = dscale[2] - *tau; - } else { - temp1 = dscale[0] - *tau; - temp2 = dscale[1] - *tau; - } - a = (temp1 + temp2) * f - temp1 * temp2 * df; - b = temp1 * temp2 * f; - c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; -/* Computing MAX */ - d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); - temp = max(d__1,d__2); - a /= temp; - b /= temp; - c__ /= temp; - if (c__ == 0.) { - eta = b / a; - } else if (a <= 0.) { - eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ - * 2.); - } else { - eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) - ); - } - if (f * eta >= 0.) { - eta = -f / df; - } - - temp = eta + *tau; - if (*orgati) { - if (eta > 0. && temp >= dscale[2]) { - eta = (dscale[2] - *tau) / 2.; - } - if (eta < 0. && temp <= dscale[1]) { - eta = (dscale[1] - *tau) / 2.; - } - } else { - if (eta > 0. && temp >= dscale[1]) { - eta = (dscale[1] - *tau) / 2.; - } - if (eta < 0. && temp <= dscale[0]) { - eta = (dscale[0] - *tau) / 2.; - } - } - *tau += eta; - - fc = 0.; - erretm = 0.; - df = 0.; - ddf = 0.; - for (i__ = 1; i__ <= 3; ++i__) { - temp = 1. / (dscale[i__ - 1] - *tau); - temp1 = zscale[i__ - 1] * temp; - temp2 = temp1 * temp; - temp3 = temp2 * temp; - temp4 = temp1 / dscale[i__ - 1]; - fc += temp4; - erretm += abs(temp4); - df += temp2; - ddf += temp3; -/* L40: */ - } - f = *finit + *tau * fc; - erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; - if (abs(f) <= eps * erretm) { - goto L60; - } -/* L50: */ - } - *info = 1; -L60: - -/* Undo scaling */ - - if (scale) { - *tau *= sclinv; - } - return 0; - -/* End of DLAED6 */ - -} /* dlaed6_ */ - -/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz, - integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, - doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer - *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * - perm, integer *givptr, integer *givcol, doublereal *givnum, - doublereal *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer indxc, indxp; - extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *, integer *, - doublereal *, integer *, integer *, integer *), dlaed9_(integer *, - integer *, integer *, integer *, doublereal *, doublereal *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - integer *, integer *), dlaeda_(integer *, integer *, integer *, - integer *, integer *, integer *, integer *, integer *, doublereal - *, doublereal *, integer *, doublereal *, doublereal *, integer *) - ; - static integer idlmda; - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAED7 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and optionally eigenvectors of a dense symmetric matrix - that has been reduced to tridiagonal form. DLAED1 handles - the case in which all eigenvalues and eigenvectors of a symmetric - tridiagonal matrix are desired. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine DLAED8. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine DLAED4 (as called by DLAED9). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= CURLVL <= TLVLS. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (output) INTEGER array, dimension (N) - The permutation which will reintegrate the subproblem just - solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) - will be in ascending order. - - RHO (input) DOUBLE PRECISION - The subdiagonal element used to create the rank-1 - modification. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) - Stores eigenvectors of submatrices encountered during - divide and conquer, packed together. QPTR points to - beginning of the submatrices. - - QPTR (input/output) INTEGER array, dimension (N+2) - List of indices pointing to beginning of submatrices stored - in QSTORE. The submatrices are numbered starting at the - bottom left of the divide and conquer tree, from left to - right and bottom to top. - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and also the size of - the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) - - IWORK (workspace) INTEGER array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --qstore; - --qptr; - --prmptr; - --perm; - --givptr; - givcol -= 3; - givnum -= 3; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*icompq == 1 && *qsiz < *n) { - *info = -4; - } else if (*ldq < max(1,*n)) { - *info = -9; - } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED7", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in DLAED8 and DLAED9. -*/ - - if (*icompq == 1) { - ldq2 = *qsiz; - } else { - ldq2 = *n; - } - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq2 = iw + *n; - is = iq2 + *n * ldq2; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); -/* L10: */ - } - curr = ptr + *curpbm; - dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & - givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz - + *n], info); - -/* - When solving the final problem, we no longer need the stored data, - so we will overwrite the data from this level onto the previously - used storage space. -*/ - - if (*curlvl == *tlvls) { - qptr[curr] = 1; - prmptr[curr] = 1; - givptr[curr] = 1; - } - -/* Sort and Deflate eigenvalues. */ - - dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, - cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & - perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) - + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ - indx], info); - prmptr[curr + 1] = prmptr[curr] + *n; - givptr[curr + 1] += givptr[curr]; - -/* Solve Secular Equation. */ - - if (k != 0) { - dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], - &work[iw], &qstore[qptr[curr]], &k, info); - if (*info != 0) { - goto L30; - } - if (*icompq == 1) { - dgemm_("N", "N", qsiz, &k, &k, &c_b2453, &work[iq2], &ldq2, & - qstore[qptr[curr]], &k, &c_b2467, &q[q_offset], ldq); - } -/* Computing 2nd power */ - i__1 = k; - qptr[curr + 1] = qptr[curr] + i__1 * i__1; - -/* Prepare the INDXQ sorting permutation. */ - - n1 = k; - n2 = *n - k; - dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - qptr[curr + 1] = qptr[curr]; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L20: */ - } - } - -L30: - return 0; - -/* End of DLAED7 */ - -} /* dlaed7_ */ - -/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer - *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, - doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, - doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer - *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer - *indx, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal c__; - static integer i__, j; - static doublereal s, t; - static integer k2, n1, n2, jp, n1p1; - static doublereal eps, tau, tol; - static integer jlam, imax, jmax; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *), dscal_( - integer *, doublereal *, doublereal *, integer *), dcopy_(integer - *, doublereal *, integer *, doublereal *, integer *); - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), dlacpy_(char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAED8 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny element in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - - K (output) INTEGER - The number of non-deflated eigenvalues, and the order of the - related secular equation. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the eigenvalues of the two submatrices to be - combined. On exit, the trailing (N-K) updated eigenvalues - (those which were deflated) sorted into increasing order. - - Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) - If ICOMPQ = 0, Q is not referenced. Otherwise, - on entry, Q contains the eigenvectors of the partially solved - system which has been previously updated in matrix - multiplies with other partially solved eigensystems. - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input) INTEGER array, dimension (N) - The permutation which separately sorts the two sub-problems - in D into ascending order. Note that elements in the second - half of this permutation must first have CUTPNT added to - their values in order to be accurate. - - RHO (input/output) DOUBLE PRECISION - On entry, the off-diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. - On exit, RHO has been modified to the value required by - DLAED3. - - CUTPNT (input) INTEGER - The location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - Z (input) DOUBLE PRECISION array, dimension (N) - On entry, Z contains the updating vector (the last row of - the first sub-eigenvector matrix and the first row of the - second sub-eigenvector matrix). - On exit, the contents of Z are destroyed by the updating - process. - - DLAMDA (output) DOUBLE PRECISION array, dimension (N) - A copy of the first K eigenvalues which will be used by - DLAED3 to form the secular equation. - - Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) - If ICOMPQ = 0, Q2 is not referenced. Otherwise, - a copy of the first K eigenvectors which will be used by - DLAED7 in a matrix multiply (DGEMM) to update the new - eigenvectors. - - LDQ2 (input) INTEGER - The leading dimension of the array Q2. LDQ2 >= max(1,N). - - W (output) DOUBLE PRECISION array, dimension (N) - The first k values of the final deflation-altered z-vector and - will be passed to DLAED3. - - PERM (output) INTEGER array, dimension (N) - The permutations (from deflation and sorting) to be applied - to each eigenblock. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. - - GIVCOL (output) INTEGER array, dimension (2, N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INDXP (workspace) INTEGER array, dimension (N) - The permutation used to place deflated values of D at the end - of the array. INDXP(1:K) points to the nondeflated D-values - and INDXP(K+1:N) points to the deflated eigenvalues. - - INDX (workspace) INTEGER array, dimension (N) - The permutation used to sort the contents of D into ascending - order. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --z__; - --dlamda; - q2_dim1 = *ldq2; - q2_offset = 1 + q2_dim1; - q2 -= q2_offset; - --w; - --perm; - givcol -= 3; - givnum -= 3; - --indxp; - --indx; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*n < 0) { - *info = -3; - } else if (*icompq == 1 && *qsiz < *n) { - *info = -4; - } else if (*ldq < max(1,*n)) { - *info = -7; - } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { - *info = -10; - } else if (*ldq2 < max(1,*n)) { - *info = -14; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n1 = *cutpnt; - n2 = *n - n1; - n1p1 = n1 + 1; - - if (*rho < 0.) { - dscal_(&n2, &c_b2589, &z__[n1p1], &c__1); - } - -/* Normalize z so that norm(z) = 1 */ - - t = 1. / sqrt(2.); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - indx[j] = j; -/* L10: */ - } - dscal_(n, &t, &z__[1], &c__1); - *rho = (d__1 = *rho * 2., abs(d__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { - indxq[i__] += *cutpnt; -/* L20: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; - w[i__] = z__[indxq[i__]]; -/* L30: */ - } - i__ = 1; - j = *cutpnt + 1; - dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = dlamda[indx[i__]]; - z__[i__] = w[indx[i__]]; -/* L40: */ - } - -/* Calculate the allowable deflation tolerence */ - - imax = idamax_(n, &z__[1], &c__1); - jmax = idamax_(n, &d__[1], &c__1); - eps = EPSILON; - tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); - -/* - If the rank-1 modifier is small enough, no more needs to be done - except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { - *k = 0; - if (*icompq == 0) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; - dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 - + 1], &c__1); -/* L60: */ - } - dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); - } - return 0; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - *k = 0; - *givptr = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - if (j == *n) { - goto L110; - } - } else { - jlam = j; - goto L80; - } -/* L70: */ - } -L80: - ++j; - if (j > *n) { - goto L100; - } - if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[jlam]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = dlapy2_(&c__, &s); - t = d__[j] - d__[jlam]; - c__ /= tau; - s = -s / tau; - if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { - -/* Deflation is possible. */ - - z__[j] = tau; - z__[jlam] = 0.; - -/* Record the appropriate Givens rotation */ - - ++(*givptr); - givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; - givcol[(*givptr << 1) + 2] = indxq[indx[j]]; - givnum[(*givptr << 1) + 1] = c__; - givnum[(*givptr << 1) + 2] = s; - if (*icompq == 1) { - drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ - indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); - } - t = d__[jlam] * c__ * c__ + d__[j] * s * s; - d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; - d__[jlam] = t; - --k2; - i__ = 1; -L90: - if (k2 + i__ <= *n) { - if (d__[jlam] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = jlam; - ++i__; - goto L90; - } else { - indxp[k2 + i__ - 1] = jlam; - } - } else { - indxp[k2 + i__ - 1] = jlam; - } - jlam = j; - } else { - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - jlam = j; - } - } - goto L80; -L100: - -/* Record the last eigenvalue. */ - - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - -L110: - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - if (*icompq == 0) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; -/* L120: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; - dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] - , &c__1); -/* L130: */ - } - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - if (*k < *n) { - if (*icompq == 0) { - i__1 = *n - *k; - dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - } else { - i__1 = *n - *k; - dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = *n - *k; - dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* - k + 1) * q_dim1 + 1], ldq); - } - } - - return 0; - -/* End of DLAED8 */ - -} /* dlaed8_ */ - -/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, - integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * - rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, - integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer i__, j; - static doublereal temp; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *), dlaed4_(integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *); - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAED9 finds the roots of the secular equation, as defined by the - values in D, Z, and RHO, between KSTART and KSTOP. It makes the - appropriate calls to DLAED4 and then stores the new matrix of - eigenvectors for use in calculating the next level of Z vectors. - - Arguments - ========= - - K (input) INTEGER - The number of terms in the rational function to be solved by - DLAED4. K >= 0. - - KSTART (input) INTEGER - KSTOP (input) INTEGER - The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP - are to be computed. 1 <= KSTART <= KSTOP <= K. - - N (input) INTEGER - The number of rows and columns in the Q matrix. - N >= K (delation may result in N > K). - - D (output) DOUBLE PRECISION array, dimension (N) - D(I) contains the updated eigenvalues - for KSTART <= I <= KSTOP. - - Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max( 1, N ). - - RHO (input) DOUBLE PRECISION - The value of the parameter in the rank one update equation. - RHO >= 0 required. - - DLAMDA (input) DOUBLE PRECISION array, dimension (K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - W (input) DOUBLE PRECISION array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating vector. - - S (output) DOUBLE PRECISION array, dimension (LDS, K) - Will contain the eigenvectors of the repaired matrix which - will be stored for subsequent Z vector calculation and - multiplied by the previously accumulated eigenvectors - to update the system. - - LDS (input) INTEGER - The leading dimension of S. LDS >= max( 1, K ). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dlamda; - --w; - s_dim1 = *lds; - s_offset = 1 + s_dim1; - s -= s_offset; - - /* Function Body */ - *info = 0; - - if (*k < 0) { - *info = -1; - } else if (*kstart < 1 || *kstart > max(1,*k)) { - *info = -2; - } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { - *info = -3; - } else if (*n < *k) { - *info = -4; - } else if (*ldq < max(1,*k)) { - *info = -7; - } else if (*lds < max(1,*k)) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAED9", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 0) { - return 0; - } - -/* - Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), - which on any of these machines zeros out the bottommost - bit of DLAMDA(I) if it is 1; this makes the subsequent - subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DLAMDA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DLAMDA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; -/* L10: */ - } - - i__1 = *kstop; - for (j = *kstart; j <= i__1; ++j) { - dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], - info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - goto L120; - } -/* L20: */ - } - - if (*k == 1 || *k == 2) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *k; - for (j = 1; j <= i__2; ++j) { - s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; -/* L30: */ - } -/* L40: */ - } - goto L120; - } - -/* Compute updated W. */ - - dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); - -/* Initialize W(I) = Q(I,I) */ - - i__1 = *ldq + 1; - dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L50: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L60: */ - } -/* L70: */ - } - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - d__1 = sqrt(-w[i__]); - w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); -/* L80: */ - } - -/* Compute eigenvectors of the modified rank-1 modification. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; -/* L90: */ - } - temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1); - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; -/* L100: */ - } -/* L110: */ - } - -L120: - return 0; - -/* End of DLAED9 */ - -} /* dlaed9_ */ - -/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl, - integer *curpbm, integer *prmptr, integer *perm, integer *givptr, - integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, - doublereal *z__, doublereal *ztemp, integer *info) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, k, mid, ptr; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *), dcopy_(integer *, - doublereal *, integer *, doublereal *, integer *), xerbla_(char *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAEDA computes the Z vector corresponding to the merge step in the - CURLVLth step of the merge process with TLVLS steps for the CURPBMth - problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= curlvl <= tlvls. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and incidentally the - size of the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - Q (input) DOUBLE PRECISION array, dimension (N**2) - Contains the square eigenblocks from previous levels, the - starting positions for blocks are given by QPTR. - - QPTR (input) INTEGER array, dimension (N+2) - Contains a list of pointers which indicate where in Q an - eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates - the size of the block. - - Z (output) DOUBLE PRECISION array, dimension (N) - On output this vector contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). - - ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --ztemp; - --z__; - --qptr; - --q; - givnum -= 3; - givcol -= 3; - --givptr; - --perm; - --prmptr; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAEDA", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine location of first number in second half. */ - - mid = *n / 2 + 1; - -/* Gather last/first rows of appropriate eigenblocks into center of Z */ - - ptr = 1; - -/* - Determine location of lowest level subproblem in the full storage - scheme -*/ - - i__1 = *curlvl - 1; - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; - -/* - Determine size of these matrices. We add HALF to the value of - the SQRT in case the machine underestimates one of these square - roots. -*/ - - bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); - bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + - .5); - i__1 = mid - bsiz1 - 1; - for (k = 1; k <= i__1; ++k) { - z__[k] = 0.; -/* L10: */ - } - dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & - c__1); - dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); - i__1 = *n; - for (k = mid + bsiz2; k <= i__1; ++k) { - z__[k] = 0.; -/* L20: */ - } - -/* - Loop thru remaining levels 1 -> CURLVL applying the Givens - rotations and permutation and then multiplying the center matrices - against the current Z. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = *curlvl - k; - i__3 = *curlvl - k - 1; - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - - 1; - psiz1 = prmptr[curr + 1] - prmptr[curr]; - psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; - zptr1 = mid - psiz1; - -/* Apply Givens at CURR and CURR+1 */ - - i__2 = givptr[curr + 1] - 1; - for (i__ = givptr[curr]; i__ <= i__2; ++i__) { - drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & - z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( - i__ << 1) + 1], &givnum[(i__ << 1) + 2]); -/* L30: */ - } - i__2 = givptr[curr + 2] - 1; - for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { - drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ - mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << - 1) + 1], &givnum[(i__ << 1) + 2]); -/* L40: */ - } - psiz1 = prmptr[curr + 1] - prmptr[curr]; - psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; - i__2 = psiz1 - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; -/* L50: */ - } - i__2 = psiz2 - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - - 1]; -/* L60: */ - } - -/* - Multiply Blocks at CURR and CURR+1 - - Determine size of these matrices. We add HALF to the value of - the SQRT in case the machine underestimates one of these - square roots. -*/ - - bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + - .5); - bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) - ) + .5); - if (bsiz1 > 0) { - dgemv_("T", &bsiz1, &bsiz1, &c_b2453, &q[qptr[curr]], &bsiz1, & - ztemp[1], &c__1, &c_b2467, &z__[zptr1], &c__1); - } - i__2 = psiz1 - bsiz1; - dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); - if (bsiz2 > 0) { - dgemv_("T", &bsiz2, &bsiz2, &c_b2453, &q[qptr[curr + 1]], &bsiz2, - &ztemp[psiz1 + 1], &c__1, &c_b2467, &z__[mid], &c__1); - } - i__2 = psiz2 - bsiz2; - dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & - c__1); - - i__2 = *tlvls - k; - ptr += pow_ii(&c__2, &i__2); -/* L70: */ - } - - return 0; - -/* End of DLAEDA */ - -} /* dlaeda_ */ - -/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__, - doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; - static integer sgn1, sgn2; - static doublereal acmn, acmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix - [ A B ] - [ B C ]. - On return, RT1 is the eigenvalue of larger absolute value, RT2 is the - eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right - eigenvector for RT1, giving the decomposition - - [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] - [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. - - Arguments - ========= - - A (input) DOUBLE PRECISION - The (1,1) element of the 2-by-2 matrix. - - B (input) DOUBLE PRECISION - The (1,2) element and the conjugate of the (2,1) element of - the 2-by-2 matrix. - - C (input) DOUBLE PRECISION - The (2,2) element of the 2-by-2 matrix. - - RT1 (output) DOUBLE PRECISION - The eigenvalue of larger absolute value. - - RT2 (output) DOUBLE PRECISION - The eigenvalue of smaller absolute value. - - CS1 (output) DOUBLE PRECISION - SN1 (output) DOUBLE PRECISION - The vector (CS1, SN1) is a unit right eigenvector for RT1. - - Further Details - =============== - - RT1 is accurate to a few ulps barring over/underflow. - - RT2 may be inaccurate if there is massive cancellation in the - determinant A*C-B*B; higher precision or correctly rounded or - correctly truncated arithmetic would be needed to compute RT2 - accurately in all cases. - - CS1 and SN1 are accurate to a few ulps barring over/underflow. - - Overflow is possible only if RT1 is within a factor of 5 of overflow. - Underflow is harmless if the input data is 0 or exceeds - underflow_threshold / macheps. - - ===================================================================== - - - Compute the eigenvalues -*/ - - sm = *a + *c__; - df = *a - *c__; - adf = abs(df); - tb = *b + *b; - ab = abs(tb); - if (abs(*a) > abs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - d__1 = ab / adf; - rt = adf * sqrt(d__1 * d__1 + 1.); - } else if (adf < ab) { -/* Computing 2nd power */ - d__1 = adf / ab; - rt = ab * sqrt(d__1 * d__1 + 1.); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.); - } - if (sm < 0.) { - *rt1 = (sm - rt) * .5; - sgn1 = -1; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.) { - *rt1 = (sm + rt) * .5; - sgn1 = 1; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5; - *rt2 = rt * -.5; - sgn1 = 1; - } - -/* Compute the eigenvector */ - - if (df >= 0.) { - cs = df + rt; - sgn2 = 1; - } else { - cs = df - rt; - sgn2 = -1; - } - acs = abs(cs); - if (acs > ab) { - ct = -tb / cs; - *sn1 = 1. / sqrt(ct * ct + 1.); - *cs1 = ct * *sn1; - } else { - if (ab == 0.) { - *cs1 = 1.; - *sn1 = 0.; - } else { - tn = -cs / tb; - *cs1 = 1. / sqrt(tn * tn + 1.); - *sn1 = tn * *cs1; - } - } - if (sgn1 == sgn2) { - tn = *cs1; - *cs1 = -(*sn1); - *sn1 = tn; - } - return 0; - -/* End of DLAEV2 */ - -} /* dlaev2_ */ - -/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n, - integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal - *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, - integer *ldz, integer *info) -{ - /* System generated locals */ - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer i__, j, k, l, m; - static doublereal s, v[3]; - static integer i1, i2; - static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, - h33, h44; - static integer nh; - static doublereal cs; - static integer nr; - static doublereal sn; - static integer nz; - static doublereal ave, h33s, h44s; - static integer itn, its; - static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static doublereal work[1]; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *), dlanv2_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *), dlabad_( - doublereal *, doublereal *); - - extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, - integer *, doublereal *); - extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, - doublereal *); - static doublereal smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAHQR is an auxiliary routine called by DHSEQR to update the - eigenvalues and Schur decomposition already computed by DHSEQR, by - dealing with the Hessenberg submatrix in rows and columns ILO to IHI. - - Arguments - ========= - - WANTT (input) LOGICAL - = .TRUE. : the full Schur form T is required; - = .FALSE.: only eigenvalues are required. - - WANTZ (input) LOGICAL - = .TRUE. : the matrix of Schur vectors Z is required; - = .FALSE.: Schur vectors are not required. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper quasi-triangular in - rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless - ILO = 1). DLAHQR works primarily with the Hessenberg - submatrix in rows and columns ILO to IHI, but applies - transformations to all of H if WANTT is .TRUE.. - 1 <= ILO <= max(1,IHI); IHI <= N. - - H (input/output) DOUBLE PRECISION array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if WANTT is .TRUE., H is upper quasi-triangular in - rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in - standard form. If WANTT is .FALSE., the contents of H are - unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - WR (output) DOUBLE PRECISION array, dimension (N) - WI (output) DOUBLE PRECISION array, dimension (N) - The real and imaginary parts, respectively, of the computed - eigenvalues ILO to IHI are stored in the corresponding - elements of WR and WI. If two eigenvalues are computed as a - complex conjugate pair, they are stored in consecutive - elements of WR and WI, say the i-th and (i+1)th, with - WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the - eigenvalues are stored in the same order as on the diagonal - of the Schur form returned in H, with WR(i) = H(i,i), and, if - H(i:i+1,i:i+1) is a 2-by-2 diagonal block, - WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). - - ILOZ (input) INTEGER - IHIZ (input) INTEGER - Specify the rows of Z to which transformations must be - applied if WANTZ is .TRUE.. - 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. - - Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) - If WANTZ is .TRUE., on entry Z must contain the current - matrix Z of transformations accumulated by DHSEQR, and on - exit Z has been updated; transformations are applied only to - the submatrix Z(ILOZ:IHIZ,ILO:IHI). - If WANTZ is .FALSE., Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI - in a total of 30*(IHI-ILO+1) iterations; if INFO = i, - elements i+1:ihi of WR and WI contain those eigenvalues - which have been successfully computed. - - Further Details - =============== - - 2-96 Based on modifications by - David Day, Sandia National Laboratory, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --wr; - --wi; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - wr[*ilo] = h__[*ilo + *ilo * h_dim1]; - wi[*ilo] = 0.; - return 0; - } - - nh = *ihi - *ilo + 1; - nz = *ihiz - *iloz + 1; - -/* - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = SAFEMINIMUM; - ovfl = 1. / unfl; - dlabad_(&unfl, &ovfl); - ulp = PRECISION; - smlnum = unfl * (nh / ulp); - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (*wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of 1 or 2. Each iteration of the loop works - with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L10: - l = *ilo; - if (i__ < *ilo) { - goto L150; - } - -/* - Perform QR iterations on rows and columns ILO to I until a - submatrix of order 1 or 2 splits off at the bottom because a - subdiagonal element has become negligible. -*/ - - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = - h__[k + k * h_dim1], abs(d__2)); - if (tst1 == 0.) { - i__3 = i__ - l + 1; - tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work); - } -/* Computing MAX */ - d__2 = ulp * tst1; - if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2, - smlnum)) { - goto L30; - } -/* L20: */ - } -L30: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible */ - - h__[l + (l - 1) * h_dim1] = 0.; - } - -/* Exit from loop if a submatrix of order 1 or 2 has split off. */ - - if (l >= i__ - 1) { - goto L140; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! (*wantt)) { - i1 = l; - i2 = i__; - } - - if (its == 10 || its == 20) { - -/* Exceptional shift. */ - - s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = - h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); - h44 = s * .75 + h__[i__ + i__ * h_dim1]; - h33 = h44; - h43h34 = s * -.4375 * s; - } else { - -/* - Prepare to use Francis' double shift - (i.e. 2nd degree generalized Rayleigh quotient) -*/ - - h44 = h__[i__ + i__ * h_dim1]; - h33 = h__[i__ - 1 + (i__ - 1) * h_dim1]; - h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * - h_dim1]; - s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) * - h_dim1]; - disc = (h33 - h44) * .5; - disc = disc * disc + h43h34; - if (disc > 0.) { - -/* Real roots: use Wilkinson's shift twice */ - - disc = sqrt(disc); - ave = (h33 + h44) * .5; - if (abs(h33) - abs(h44) > 0.) { - h33 = h33 * h44 - h43h34; - h44 = h33 / (d_sign(&disc, &ave) + ave); - } else { - h44 = d_sign(&disc, &ave) + ave; - } - h33 = h44; - h43h34 = 0.; - } - } - -/* Look for two consecutive small subdiagonal elements. */ - - i__2 = l; - for (m = i__ - 2; m >= i__2; --m) { -/* - Determine the effect of starting the double-shift QR - iteration at row M, and see if this would make H(M,M-1) - negligible. -*/ - - h11 = h__[m + m * h_dim1]; - h22 = h__[m + 1 + (m + 1) * h_dim1]; - h21 = h__[m + 1 + m * h_dim1]; - h12 = h__[m + (m + 1) * h_dim1]; - h44s = h44 - h11; - h33s = h33 - h11; - v1 = (h33s * h44s - h43h34) / h21 + h12; - v2 = h22 - h11 - h33s - h44s; - v3 = h__[m + 2 + (m + 1) * h_dim1]; - s = abs(v1) + abs(v2) + abs(v3); - v1 /= s; - v2 /= s; - v3 /= s; - v[0] = v1; - v[1] = v2; - v[2] = v3; - if (m == l) { - goto L50; - } - h00 = h__[m - 1 + (m - 1) * h_dim1]; - h10 = h__[m + (m - 1) * h_dim1]; - tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22)); - if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) { - goto L50; - } -/* L40: */ - } -L50: - -/* Double-shift QR step */ - - i__2 = i__ - 1; - for (k = m; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__3 = 3, i__4 = i__ - k + 1; - nr = min(i__3,i__4); - if (k > m) { - dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - dlarfg_(&nr, v, &v[1], &c__1, &t1); - if (k > m) { - h__[k + (k - 1) * h_dim1] = v[0]; - h__[k + 1 + (k - 1) * h_dim1] = 0.; - if (k < i__ - 1) { - h__[k + 2 + (k - 1) * h_dim1] = 0.; - } - } else if (m > l) { - h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1]; - } - v2 = v[1]; - t2 = t1 * v2; - if (nr == 3) { - v3 = v[2]; - t3 = t1 * v3; - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] - + v3 * h__[k + 2 + j * h_dim1]; - h__[k + j * h_dim1] -= sum * t1; - h__[k + 1 + j * h_dim1] -= sum * t2; - h__[k + 2 + j * h_dim1] -= sum * t3; -/* L60: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+3,I). - - Computing MIN -*/ - i__4 = k + 3; - i__3 = min(i__4,i__); - for (j = i1; j <= i__3; ++j) { - sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] - + v3 * h__[j + (k + 2) * h_dim1]; - h__[j + k * h_dim1] -= sum * t1; - h__[j + (k + 1) * h_dim1] -= sum * t2; - h__[j + (k + 2) * h_dim1] -= sum * t3; -/* L70: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * - z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; - z__[j + k * z_dim1] -= sum * t1; - z__[j + (k + 1) * z_dim1] -= sum * t2; - z__[j + (k + 2) * z_dim1] -= sum * t3; -/* L80: */ - } - } - } else if (nr == 2) { - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; - h__[k + j * h_dim1] -= sum * t1; - h__[k + 1 + j * h_dim1] -= sum * t2; -/* L90: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+3,I). -*/ - - i__3 = i__; - for (j = i1; j <= i__3; ++j) { - sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] - ; - h__[j + k * h_dim1] -= sum * t1; - h__[j + (k + 1) * h_dim1] -= sum * t2; -/* L100: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * - z_dim1]; - z__[j + k * z_dim1] -= sum * t1; - z__[j + (k + 1) * z_dim1] -= sum * t2; -/* L110: */ - } - } - } -/* L120: */ - } - -/* L130: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L140: - - if (l == i__) { - -/* H(I,I-1) is negligible: one eigenvalue has converged. */ - - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.; - } else if (l == i__ - 1) { - -/* - H(I-1,I-2) is negligible: a pair of eigenvalues have converged. - - Transform the 2-by-2 submatrix to standard Schur form, - and compute and store the eigenvalues. -*/ - - dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * - h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * - h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, - &sn); - - if (*wantt) { - -/* Apply the transformation to the rest of H. */ - - if (i2 > i__) { - i__1 = i2 - i__; - drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ - i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); - } - i__1 = i__ - i1 - 1; - drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * - h_dim1], &c__1, &cs, &sn); - } - if (*wantz) { - -/* Apply the transformation to Z. */ - - drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + - i__ * z_dim1], &c__1, &cs, &sn); - } - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L10; - -L150: - return 0; - -/* End of DLAHQR */ - -} /* dlahqr_ */ - -/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal * - a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, - doublereal *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, - i__3; - doublereal d__1; - - /* Local variables */ - static integer i__; - static doublereal ei; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dgemv_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *), dcopy_(integer *, doublereal *, - integer *, doublereal *, integer *), daxpy_(integer *, doublereal - *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char - *, char *, char *, integer *, doublereal *, integer *, doublereal - *, integer *), dlarfg_(integer *, - doublereal *, doublereal *, integer *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) - matrix A so that elements below the k-th subdiagonal are zero. The - reduction is performed by an orthogonal similarity transformation - Q' * A * Q. The routine returns the matrices V and T which determine - Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. - - This is an auxiliary routine called by DGEHRD. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. - - K (input) INTEGER - The offset for the reduction. Elements below the k-th - subdiagonal in the first NB columns are reduced to zero. - - NB (input) INTEGER - The number of columns to be reduced. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) - On entry, the n-by-(n-k+1) general matrix A. - On exit, the elements on and above the k-th subdiagonal in - the first NB columns are overwritten with the corresponding - elements of the reduced matrix; the elements below the k-th - subdiagonal, with the array TAU, represent the matrix Q as a - product of elementary reflectors. The other columns of A are - unchanged. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) DOUBLE PRECISION array, dimension (NB) - The scalar factors of the elementary reflectors. See Further - Details. - - T (output) DOUBLE PRECISION array, dimension (LDT,NB) - The upper triangular matrix T. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= NB. - - Y (output) DOUBLE PRECISION array, dimension (LDY,NB) - The n-by-nb matrix Y. - - LDY (input) INTEGER - The leading dimension of the array Y. LDY >= N. - - Further Details - =============== - - The matrix Q is represented as a product of nb elementary reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in - A(i+k+1:n,i), and tau in TAU(i). - - The elements of the vectors v together form the (n-k+1)-by-nb matrix - V which is needed, with T and Y, to apply the transformation to the - unreduced part of the matrix, using an update of the form: - A := (I - V*T*V') * (A - Y*V'). - - The contents of A on exit are illustrated by the following example - with n = 7, k = 3 and nb = 2: - - ( a h a a a ) - ( a h a a a ) - ( a h a a a ) - ( h h a a a ) - ( v1 h a a a ) - ( v1 v2 a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - --tau; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*n <= 1) { - return 0; - } - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ > 1) { - -/* - Update A(1:n,i) - - Compute i-th column of A - Y * V' -*/ - - i__2 = i__ - 1; - dgemv_("No transpose", n, &i__2, &c_b2589, &y[y_offset], ldy, &a[* - k + i__ - 1 + a_dim1], lda, &c_b2453, &a[i__ * a_dim1 + 1] - , &c__1); - -/* - Apply I - V * T' * V' to this column (call it b) from the - left, using the last column of T as workspace - - Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) - ( V2 ) ( b2 ) - - where V1 is unit lower triangular - - w := V1' * b1 -*/ - - i__2 = i__ - 1; - dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + - 1], &c__1); - i__2 = i__ - 1; - dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], - lda, &t[*nb * t_dim1 + 1], &c__1); - -/* w := w + V2'*b2 */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[*k + i__ + a_dim1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2453, &t[* - nb * t_dim1 + 1], &c__1); - -/* w := T'*w */ - - i__2 = i__ - 1; - dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[*nb * t_dim1 + 1], &c__1); - -/* b2 := b2 - V2*w */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[*k + i__ + - a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2453, &a[* - k + i__ + i__ * a_dim1], &c__1); - -/* b1 := b1 - V1*w */ - - i__2 = i__ - 1; - dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] - , lda, &t[*nb * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - daxpy_(&i__2, &c_b2589, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + - i__ * a_dim1], &c__1); - - a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; - } - -/* - Generate the elementary reflector H(i) to annihilate - A(k+i+1:n,i) -*/ - - i__2 = *n - *k - i__ + 1; -/* Computing MIN */ - i__3 = *k + i__ + 1; - dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &tau[i__]); - ei = a[*k + i__ + i__ * a_dim1]; - a[*k + i__ + i__ * a_dim1] = 1.; - -/* Compute Y(1:n,i) */ - - i__2 = *n - *k - i__ + 1; - dgemv_("No transpose", n, &i__2, &c_b2453, &a[(i__ + 1) * a_dim1 + 1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[*k + i__ + a_dim1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2467, &t[i__ * - t_dim1 + 1], &c__1); - i__2 = i__ - 1; - dgemv_("No transpose", n, &i__2, &c_b2589, &y[y_offset], ldy, &t[i__ * - t_dim1 + 1], &c__1, &c_b2453, &y[i__ * y_dim1 + 1], &c__1); - dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); - -/* Compute T(1:i,i) */ - - i__2 = i__ - 1; - d__1 = -tau[i__]; - dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[i__ * t_dim1 + 1], &c__1) - ; - t[i__ + i__ * t_dim1] = tau[i__]; - -/* L10: */ - } - a[*k + *nb + *nb * a_dim1] = ei; - - return 0; - -/* End of DLAHRD */ - -} /* dlahrd_ */ - -/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw, - doublereal *smin, doublereal *ca, doublereal *a, integer *lda, - doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, - doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, - doublereal *scale, doublereal *xnorm, integer *info) -{ - /* Initialized data */ - - static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; - static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; - static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, - 4,3,2,1 }; - - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; - doublereal d__1, d__2, d__3, d__4, d__5, d__6; - static doublereal equiv_0[4], equiv_1[4]; - - /* Local variables */ - static integer j; -#define ci (equiv_0) -#define cr (equiv_1) - static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, - cr21, cr22, li21, csi, ui11, lr21, ui12, ui22; -#define civ (equiv_0) - static doublereal csr, ur11, ur12, ur22; -#define crv (equiv_1) - static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; - static integer icmax; - static doublereal bnorm, cnorm, smini; - - extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *); - static doublereal bignum, smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLALN2 solves a system of the form (ca A - w D ) X = s B - or (ca A' - w D) X = s B with possible scaling ("s") and - perturbation of A. (A' means A-transpose.) - - A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA - real diagonal matrix, w is a real or complex value, and X and B are - NA x 1 matrices -- real if w is real, complex if w is complex. NA - may be 1 or 2. - - If w is complex, X and B are represented as NA x 2 matrices, - the first column of each being the real part and the second - being the imaginary part. - - "s" is a scaling factor (.LE. 1), computed by DLALN2, which is - so chosen that X can be computed without overflow. X is further - scaled if necessary to assure that norm(ca A - w D)*norm(X) is less - than overflow. - - If both singular values of (ca A - w D) are less than SMIN, - SMIN*identity will be used instead of (ca A - w D). If only one - singular value is less than SMIN, one element of (ca A - w D) will be - perturbed enough to make the smallest singular value roughly SMIN. - If both singular values are at least SMIN, (ca A - w D) will not be - perturbed. In any case, the perturbation will be at most some small - multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values - are computed by infinity-norm approximations, and thus will only be - correct to a factor of 2 or so. - - Note: all input quantities are assumed to be smaller than overflow - by a reasonable factor. (See BIGNUM.) - - Arguments - ========== - - LTRANS (input) LOGICAL - =.TRUE.: A-transpose will be used. - =.FALSE.: A will be used (not transposed.) - - NA (input) INTEGER - The size of the matrix A. It may (only) be 1 or 2. - - NW (input) INTEGER - 1 if "w" is real, 2 if "w" is complex. It may only be 1 - or 2. - - SMIN (input) DOUBLE PRECISION - The desired lower bound on the singular values of A. This - should be a safe distance away from underflow or overflow, - say, between (underflow/machine precision) and (machine - precision * overflow ). (See BIGNUM and ULP.) - - CA (input) DOUBLE PRECISION - The coefficient c, which A is multiplied by. - - A (input) DOUBLE PRECISION array, dimension (LDA,NA) - The NA x NA matrix A. - - LDA (input) INTEGER - The leading dimension of A. It must be at least NA. - - D1 (input) DOUBLE PRECISION - The 1,1 element in the diagonal matrix D. - - D2 (input) DOUBLE PRECISION - The 2,2 element in the diagonal matrix D. Not used if NW=1. - - B (input) DOUBLE PRECISION array, dimension (LDB,NW) - The NA x NW matrix B (right-hand side). If NW=2 ("w" is - complex), column 1 contains the real part of B and column 2 - contains the imaginary part. - - LDB (input) INTEGER - The leading dimension of B. It must be at least NA. - - WR (input) DOUBLE PRECISION - The real part of the scalar "w". - - WI (input) DOUBLE PRECISION - The imaginary part of the scalar "w". Not used if NW=1. - - X (output) DOUBLE PRECISION array, dimension (LDX,NW) - The NA x NW matrix X (unknowns), as computed by DLALN2. - If NW=2 ("w" is complex), on exit, column 1 will contain - the real part of X and column 2 will contain the imaginary - part. - - LDX (input) INTEGER - The leading dimension of X. It must be at least NA. - - SCALE (output) DOUBLE PRECISION - The scale factor that B must be multiplied by to insure - that overflow does not occur when computing X. Thus, - (ca A - w D) X will be SCALE*B, not B (ignoring - perturbations of A.) It will be at most 1. - - XNORM (output) DOUBLE PRECISION - The infinity-norm of X, when X is regarded as an NA x NW - real matrix. - - INFO (output) INTEGER - An error flag. It will be set to zero if no error occurs, - a negative number if an argument is in error, or a positive - number if ca A - w D had to be perturbed. - The possible values are: - = 0: No error occurred, and (ca A - w D) did not have to be - perturbed. - = 1: (ca A - w D) had to be perturbed to make its smallest - (or only) singular value greater than SMIN. - NOTE: In the interests of speed, this routine does not - check the inputs for errors. - - ===================================================================== -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - - /* Function Body */ - -/* Compute BIGNUM */ - - smlnum = 2. * SAFEMINIMUM; - bignum = 1. / smlnum; - smini = max(*smin,smlnum); - -/* Don't check for input errors */ - - *info = 0; - -/* Standard Initializations */ - - *scale = 1.; - - if (*na == 1) { - -/* 1 x 1 (i.e., scalar) system C X = B */ - - if (*nw == 1) { - -/* - Real 1x1 system. - - C = ca A - w D -*/ - - csr = *ca * a[a_dim1 + 1] - *wr * *d1; - cnorm = abs(csr); - -/* If | C | < SMINI, use C = SMINI */ - - if (cnorm < smini) { - csr = smini; - cnorm = smini; - *info = 1; - } - -/* Check scaling for X = B / C */ - - bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); - if (cnorm < 1. && bnorm > 1.) { - if (bnorm > bignum * cnorm) { - *scale = 1. / bnorm; - } - } - -/* Compute X */ - - x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; - *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); - } else { - -/* - Complex 1x1 system (w is complex) - - C = ca A - w D -*/ - - csr = *ca * a[a_dim1 + 1] - *wr * *d1; - csi = -(*wi) * *d1; - cnorm = abs(csr) + abs(csi); - -/* If | C | < SMINI, use C = SMINI */ - - if (cnorm < smini) { - csr = smini; - csi = 0.; - cnorm = smini; - *info = 1; - } - -/* Check scaling for X = B / C */ - - bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << - 1) + 1], abs(d__2)); - if (cnorm < 1. && bnorm > 1.) { - if (bnorm > bignum * cnorm) { - *scale = 1. / bnorm; - } - } - -/* Compute X */ - - d__1 = *scale * b[b_dim1 + 1]; - d__2 = *scale * b[(b_dim1 << 1) + 1]; - dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) - + 1]); - *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << - 1) + 1], abs(d__2)); - } - - } else { - -/* - 2x2 System - - Compute the real part of C = ca A - w D (or ca A' - w D ) -*/ - - cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; - cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; - if (*ltrans) { - cr[2] = *ca * a[a_dim1 + 2]; - cr[1] = *ca * a[(a_dim1 << 1) + 1]; - } else { - cr[1] = *ca * a[a_dim1 + 2]; - cr[2] = *ca * a[(a_dim1 << 1) + 1]; - } - - if (*nw == 1) { - -/* - Real 2x2 system (w is real) - - Find the largest element in C -*/ - - cmax = 0.; - icmax = 0; - - for (j = 1; j <= 4; ++j) { - if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { - cmax = (d__1 = crv[j - 1], abs(d__1)); - icmax = j; - } -/* L10: */ - } - -/* If norm(C) < SMINI, use SMINI*identity. */ - - if (cmax < smini) { -/* Computing MAX */ - d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[ - b_dim1 + 2], abs(d__2)); - bnorm = max(d__3,d__4); - if (smini < 1. && bnorm > 1.) { - if (bnorm > bignum * smini) { - *scale = 1. / bnorm; - } - } - temp = *scale / smini; - x[x_dim1 + 1] = temp * b[b_dim1 + 1]; - x[x_dim1 + 2] = temp * b[b_dim1 + 2]; - *xnorm = temp * bnorm; - *info = 1; - return 0; - } - -/* Gaussian elimination with complete pivoting. */ - - ur11 = crv[icmax - 1]; - cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; - ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; - cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; - ur11r = 1. / ur11; - lr21 = ur11r * cr21; - ur22 = cr22 - ur12 * lr21; - -/* If smaller pivot < SMINI, use SMINI */ - - if (abs(ur22) < smini) { - ur22 = smini; - *info = 1; - } - if (rswap[icmax - 1]) { - br1 = b[b_dim1 + 2]; - br2 = b[b_dim1 + 1]; - } else { - br1 = b[b_dim1 + 1]; - br2 = b[b_dim1 + 2]; - } - br2 -= lr21 * br1; -/* Computing MAX */ - d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); - bbnd = max(d__2,d__3); - if (bbnd > 1. && abs(ur22) < 1.) { - if (bbnd >= bignum * abs(ur22)) { - *scale = 1. / bbnd; - } - } - - xr2 = br2 * *scale / ur22; - xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); - if (zswap[icmax - 1]) { - x[x_dim1 + 1] = xr2; - x[x_dim1 + 2] = xr1; - } else { - x[x_dim1 + 1] = xr1; - x[x_dim1 + 2] = xr2; - } -/* Computing MAX */ - d__1 = abs(xr1), d__2 = abs(xr2); - *xnorm = max(d__1,d__2); - -/* Further scaling if norm(A) norm(X) > overflow */ - - if (*xnorm > 1. && cmax > 1.) { - if (*xnorm > bignum / cmax) { - temp = cmax / bignum; - x[x_dim1 + 1] = temp * x[x_dim1 + 1]; - x[x_dim1 + 2] = temp * x[x_dim1 + 2]; - *xnorm = temp * *xnorm; - *scale = temp * *scale; - } - } - } else { - -/* - Complex 2x2 system (w is complex) - - Find the largest element in C -*/ - - ci[0] = -(*wi) * *d1; - ci[1] = 0.; - ci[2] = 0.; - ci[3] = -(*wi) * *d2; - cmax = 0.; - icmax = 0; - - for (j = 1; j <= 4; ++j) { - if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs( - d__2)) > cmax) { - cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1] - , abs(d__2)); - icmax = j; - } -/* L20: */ - } - -/* If norm(C) < SMINI, use SMINI*identity. */ - - if (cmax < smini) { -/* Computing MAX */ - d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 - << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], - abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); - bnorm = max(d__5,d__6); - if (smini < 1. && bnorm > 1.) { - if (bnorm > bignum * smini) { - *scale = 1. / bnorm; - } - } - temp = *scale / smini; - x[x_dim1 + 1] = temp * b[b_dim1 + 1]; - x[x_dim1 + 2] = temp * b[b_dim1 + 2]; - x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; - x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; - *xnorm = temp * bnorm; - *info = 1; - return 0; - } - -/* Gaussian elimination with complete pivoting. */ - - ur11 = crv[icmax - 1]; - ui11 = civ[icmax - 1]; - cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; - ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; - ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; - ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; - cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; - ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; - if (icmax == 1 || icmax == 4) { - -/* Code when off-diagonals of pivoted C are real */ - - if (abs(ur11) > abs(ui11)) { - temp = ui11 / ur11; -/* Computing 2nd power */ - d__1 = temp; - ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); - ui11r = -temp * ur11r; - } else { - temp = ur11 / ui11; -/* Computing 2nd power */ - d__1 = temp; - ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); - ur11r = -temp * ui11r; - } - lr21 = cr21 * ur11r; - li21 = cr21 * ui11r; - ur12s = ur12 * ur11r; - ui12s = ur12 * ui11r; - ur22 = cr22 - ur12 * lr21; - ui22 = ci22 - ur12 * li21; - } else { - -/* Code when diagonals of pivoted C are real */ - - ur11r = 1. / ur11; - ui11r = 0.; - lr21 = cr21 * ur11r; - li21 = ci21 * ur11r; - ur12s = ur12 * ur11r; - ui12s = ui12 * ur11r; - ur22 = cr22 - ur12 * lr21 + ui12 * li21; - ui22 = -ur12 * li21 - ui12 * lr21; - } - u22abs = abs(ur22) + abs(ui22); - -/* If smaller pivot < SMINI, use SMINI */ - - if (u22abs < smini) { - ur22 = smini; - ui22 = 0.; - *info = 1; - } - if (rswap[icmax - 1]) { - br2 = b[b_dim1 + 1]; - br1 = b[b_dim1 + 2]; - bi2 = b[(b_dim1 << 1) + 1]; - bi1 = b[(b_dim1 << 1) + 2]; - } else { - br1 = b[b_dim1 + 1]; - br2 = b[b_dim1 + 2]; - bi1 = b[(b_dim1 << 1) + 1]; - bi2 = b[(b_dim1 << 1) + 2]; - } - br2 = br2 - lr21 * br1 + li21 * bi1; - bi2 = bi2 - li21 * br1 - lr21 * bi1; -/* Computing MAX */ - d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r)) - ), d__2 = abs(br2) + abs(bi2); - bbnd = max(d__1,d__2); - if (bbnd > 1. && u22abs < 1.) { - if (bbnd >= bignum * u22abs) { - *scale = 1. / bbnd; - br1 = *scale * br1; - bi1 = *scale * bi1; - br2 = *scale * br2; - bi2 = *scale * bi2; - } - } - - dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); - xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; - xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; - if (zswap[icmax - 1]) { - x[x_dim1 + 1] = xr2; - x[x_dim1 + 2] = xr1; - x[(x_dim1 << 1) + 1] = xi2; - x[(x_dim1 << 1) + 2] = xi1; - } else { - x[x_dim1 + 1] = xr1; - x[x_dim1 + 2] = xr2; - x[(x_dim1 << 1) + 1] = xi1; - x[(x_dim1 << 1) + 2] = xi2; - } -/* Computing MAX */ - d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); - *xnorm = max(d__1,d__2); - -/* Further scaling if norm(A) norm(X) > overflow */ - - if (*xnorm > 1. && cmax > 1.) { - if (*xnorm > bignum / cmax) { - temp = cmax / bignum; - x[x_dim1 + 1] = temp * x[x_dim1 + 1]; - x[x_dim1 + 2] = temp * x[x_dim1 + 2]; - x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; - x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; - *xnorm = temp * *xnorm; - *scale = temp * *scale; - } - } - } - } - - return 0; - -/* End of DLALN2 */ - -} /* dlaln2_ */ - -#undef crv -#undef civ -#undef cr -#undef ci - - -/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr, - integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal - *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, - integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * - poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * - k, doublereal *c__, doublereal *s, doublereal *work, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, - difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, - poles_offset, i__1, i__2; - doublereal d__1; - - /* Local variables */ - static integer i__, j, m, n; - static doublereal dj; - static integer nlp1; - static doublereal temp; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - static doublereal diflj, difrj, dsigj; - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *), dcopy_(integer *, - doublereal *, integer *, doublereal *, integer *); - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlacpy_(char *, integer *, integer - *, doublereal *, integer *, doublereal *, integer *), - xerbla_(char *, integer *); - static doublereal dsigjp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 1, 1999 - - - Purpose - ======= - - DLALS0 applies back the multiplying factors of either the left or the - right singular vector matrix of a diagonal matrix appended by a row - to the right hand side matrix B in solving the least squares problem - using the divide-and-conquer SVD approach. - - For the left singular vector matrix, three types of orthogonal - matrices are involved: - - (1L) Givens rotations: the number of such rotations is GIVPTR; the - pairs of columns/rows they were applied to are stored in GIVCOL; - and the C- and S-values of these rotations are stored in GIVNUM. - - (2L) Permutation. The (NL+1)-st row of B is to be moved to the first - row, and for J=2:N, PERM(J)-th row of B is to be moved to the - J-th row. - - (3L) The left singular vector matrix of the remaining matrix. - - For the right singular vector matrix, four types of orthogonal - matrices are involved: - - (1R) The right singular vector matrix of the remaining matrix. - - (2R) If SQRE = 1, one extra Givens rotation to generate the right - null space. - - (3R) The inverse transformation of (2L). - - (4R) The inverse transformation of (1L). - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form: - = 0: Left singular vector matrix. - = 1: Right singular vector matrix. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - NRHS (input) INTEGER - The number of columns of B and BX. NRHS must be at least 1. - - B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) - On input, B contains the right hand sides of the least - squares problem in rows 1 through M. On output, B contains - the solution X in rows 1 through N. - - LDB (input) INTEGER - The leading dimension of B. LDB must be at least - max(1,MAX( M, N ) ). - - BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) - - LDBX (input) INTEGER - The leading dimension of BX. - - PERM (input) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) applied - to the two blocks. - - GIVPTR (input) INTEGER - The number of Givens rotations which took place in this - subproblem. - - GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of rows/columns - involved in a Givens rotation. - - LDGCOL (input) INTEGER - The leading dimension of GIVCOL, must be at least N. - - GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value used in the - corresponding Givens rotation. - - LDGNUM (input) INTEGER - The leading dimension of arrays DIFR, POLES and - GIVNUM, must be at least K. - - POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - On entry, POLES(1:K, 1) contains the new singular - values obtained from solving the secular equation, and - POLES(1:K, 2) is an array containing the poles in the secular - equation. - - DIFL (input) DOUBLE PRECISION array, dimension ( K ). - On entry, DIFL(I) is the distance between I-th updated - (undeflated) singular value and the I-th (undeflated) old - singular value. - - DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). - On entry, DIFR(I, 1) contains the distances between I-th - updated (undeflated) singular value and the I+1-th - (undeflated) old singular value. And DIFR(I, 2) is the - normalizing factor for the I-th right singular vector. - - Z (input) DOUBLE PRECISION array, dimension ( K ) - Contain the components of the deflation-adjusted updating row - vector. - - K (input) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - C (input) DOUBLE PRECISION - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (input) DOUBLE PRECISION - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - WORK (workspace) DOUBLE PRECISION array, dimension ( K ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - bx_dim1 = *ldbx; - bx_offset = 1 + bx_dim1; - bx -= bx_offset; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - difr_dim1 = *ldgnum; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - poles_dim1 = *ldgnum; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - --difl; - --z__; - --work; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } - - n = *nl + *nr + 1; - - if (*nrhs < 1) { - *info = -5; - } else if (*ldb < n) { - *info = -7; - } else if (*ldbx < n) { - *info = -9; - } else if (*givptr < 0) { - *info = -11; - } else if (*ldgcol < n) { - *info = -13; - } else if (*ldgnum < n) { - *info = -15; - } else if (*k < 1) { - *info = -20; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLALS0", &i__1); - return 0; - } - - m = n + *sqre; - nlp1 = *nl + 1; - - if (*icompq == 0) { - -/* - Apply back orthogonal transformations from the left. - - Step (1L): apply back the Givens rotations performed. -*/ - - i__1 = *givptr; - for (i__ = 1; i__ <= i__1; ++i__) { - drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & - b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + - (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); -/* L10: */ - } - -/* Step (2L): permute rows of B. */ - - dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], - ldbx); -/* L20: */ - } - -/* - Step (3L): apply the inverse of the left singular vector - matrix to BX. -*/ - - if (*k == 1) { - dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); - if (z__[1] < 0.) { - dscal_(nrhs, &c_b2589, &b[b_offset], ldb); - } - } else { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - diflj = difl[j]; - dj = poles[j + poles_dim1]; - dsigj = -poles[j + (poles_dim1 << 1)]; - if (j < *k) { - difrj = -difr[j + difr_dim1]; - dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; - } - if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { - work[j] = 0.; - } else { - work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / - (poles[j + (poles_dim1 << 1)] + dj); - } - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == - 0.) { - work[i__] = 0.; - } else { - work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] - / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & - dsigj) - diflj) / (poles[i__ + (poles_dim1 << - 1)] + dj); - } -/* L30: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == - 0.) { - work[i__] = 0.; - } else { - work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] - / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & - dsigjp) + difrj) / (poles[i__ + (poles_dim1 << - 1)] + dj); - } -/* L40: */ - } - work[1] = -1.; - temp = dnrm2_(k, &work[1], &c__1); - dgemv_("T", k, nrhs, &c_b2453, &bx[bx_offset], ldbx, &work[1], - &c__1, &c_b2467, &b[j + b_dim1], ldb); - dlascl_("G", &c__0, &c__0, &temp, &c_b2453, &c__1, nrhs, &b[j - + b_dim1], ldb, info); -/* L50: */ - } - } - -/* Move the deflated rows of BX to B also. */ - - if (*k < max(m,n)) { - i__1 = n - *k; - dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 - + b_dim1], ldb); - } - } else { - -/* - Apply back the right orthogonal transformations. - - Step (1R): apply back the new right singular vector matrix - to B. -*/ - - if (*k == 1) { - dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); - } else { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dsigj = poles[j + (poles_dim1 << 1)]; - if (z__[j] == 0.) { - work[j] = 0.; - } else { - work[j] = -z__[j] / difl[j] / (dsigj + poles[j + - poles_dim1]) / difr[j + (difr_dim1 << 1)]; - } - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - if (z__[j] == 0.) { - work[i__] = 0.; - } else { - d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; - work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ - i__ + difr_dim1]) / (dsigj + poles[i__ + - poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; - } -/* L60: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - if (z__[j] == 0.) { - work[i__] = 0.; - } else { - d__1 = -poles[i__ + (poles_dim1 << 1)]; - work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ - i__]) / (dsigj + poles[i__ + poles_dim1]) / - difr[i__ + (difr_dim1 << 1)]; - } -/* L70: */ - } - dgemv_("T", k, nrhs, &c_b2453, &b[b_offset], ldb, &work[1], & - c__1, &c_b2467, &bx[j + bx_dim1], ldbx); -/* L80: */ - } - } - -/* - Step (2R): if SQRE = 1, apply back the rotation that is - related to the right null space of the subproblem. -*/ - - if (*sqre == 1) { - dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); - drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, - s); - } - if (*k < max(m,n)) { - i__1 = n - *k; - dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + - bx_dim1], ldbx); - } - -/* Step (3R): permute rows of B. */ - - dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); - if (*sqre == 1) { - dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); - } - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], - ldb); -/* L90: */ - } - -/* Step (4R): apply back the Givens rotations performed. */ - - for (i__ = *givptr; i__ >= 1; --i__) { - d__1 = -givnum[i__ + givnum_dim1]; - drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & - b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + - (givnum_dim1 << 1)], &d__1); -/* L100: */ - } - } - - return 0; - -/* End of DLALS0 */ - -} /* dlals0_ */ - -/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, - integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * - ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, - doublereal *difl, doublereal *difr, doublereal *z__, doublereal * - poles, integer *givptr, integer *givcol, integer *ldgcol, integer * - perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * - work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, - b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, - difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, - u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, - i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, - ndb1, nlp1, lvl2, nrp1, nlvl, sqre; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer inode, ndiml, ndimr; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *), dlals0_(integer *, integer *, integer *, - integer *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, doublereal *, doublereal *, doublereal *, - integer *), dlasdt_(integer *, integer *, integer *, integer *, - integer *, integer *, integer *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLALSA is an itermediate step in solving the least squares problem - by computing the SVD of the coefficient matrix in compact form (The - singular vectors are computed as products of simple orthorgonal - matrices.). - - If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector - matrix of an upper bidiagonal matrix to the right hand side; and if - ICOMPQ = 1, DLALSA applies the right singular vector matrix to the - right hand side. The singular vector matrices were generated in - compact form by DLALSA. - - Arguments - ========= - - - ICOMPQ (input) INTEGER - Specifies whether the left or the right singular vector - matrix is involved. - = 0: Left singular vector matrix - = 1: Right singular vector matrix - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The row and column dimensions of the upper bidiagonal matrix. - - NRHS (input) INTEGER - The number of columns of B and BX. NRHS must be at least 1. - - B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) - On input, B contains the right hand sides of the least - squares problem in rows 1 through M. On output, B contains - the solution X in rows 1 through N. - - LDB (input) INTEGER - The leading dimension of B in the calling subprogram. - LDB must be at least max(1,MAX( M, N ) ). - - BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) - On exit, the result of applying the left or right singular - vector matrix to B. - - LDBX (input) INTEGER - The leading dimension of BX. - - U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). - On entry, U contains the left singular vector matrices of all - subproblems at the bottom level. - - LDU (input) INTEGER, LDU = > N. - The leading dimension of arrays U, VT, DIFL, DIFR, - POLES, GIVNUM, and Z. - - VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). - On entry, VT' contains the right singular vector matrices of - all subproblems at the bottom level. - - K (input) INTEGER array, dimension ( N ). - - DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). - where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. - - DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record - distances between singular values on the I-th level and - singular values on the (I -1)-th level, and DIFR(*, 2 * I) - record the normalizing factors of the right singular vectors - matrices of subproblems on I-th level. - - Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). - On entry, Z(1, I) contains the components of the deflation- - adjusted updating row vector for subproblems on the I-th - level. - - POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old - singular values involved in the secular equations on the I-th - level. - - GIVPTR (input) INTEGER array, dimension ( N ). - On entry, GIVPTR( I ) records the number of Givens - rotations performed on the I-th problem on the computation - tree. - - GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). - On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the - locations of Givens rotations performed on the I-th level on - the computation tree. - - LDGCOL (input) INTEGER, LDGCOL = > N. - The leading dimension of arrays GIVCOL and PERM. - - PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). - On entry, PERM(*, I) records permutations done on the I-th - level of the computation tree. - - GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- - values of Givens rotations performed on the I-th level on the - computation tree. - - C (input) DOUBLE PRECISION array, dimension ( N ). - On entry, if the I-th subproblem is not square, - C( I ) contains the C-value of a Givens rotation related to - the right null space of the I-th subproblem. - - S (input) DOUBLE PRECISION array, dimension ( N ). - On entry, if the I-th subproblem is not square, - S( I ) contains the S-value of a Givens rotation related to - the right null space of the I-th subproblem. - - WORK (workspace) DOUBLE PRECISION array. - The dimension must be at least N. - - IWORK (workspace) INTEGER array. - The dimension must be at least 3 * N - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - bx_dim1 = *ldbx; - bx_offset = 1 + bx_dim1; - bx -= bx_offset; - givnum_dim1 = *ldu; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - poles_dim1 = *ldu; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - z_dim1 = *ldu; - z_offset = 1 + z_dim1; - z__ -= z_offset; - difr_dim1 = *ldu; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - difl_dim1 = *ldu; - difl_offset = 1 + difl_dim1; - difl -= difl_offset; - vt_dim1 = *ldu; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - --k; - --givptr; - perm_dim1 = *ldgcol; - perm_offset = 1 + perm_dim1; - perm -= perm_offset; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - --c__; - --s; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*smlsiz < 3) { - *info = -2; - } else if (*n < *smlsiz) { - *info = -3; - } else if (*nrhs < 1) { - *info = -4; - } else if (*ldb < *n) { - *info = -6; - } else if (*ldbx < *n) { - *info = -8; - } else if (*ldu < *n) { - *info = -10; - } else if (*ldgcol < *n) { - *info = -19; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLALSA", &i__1); - return 0; - } - -/* Book-keeping and setting up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - - dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - The following code applies back the left singular vector factors. - For applying back the right singular vector factors, go to 50. -*/ - - if (*icompq == 1) { - goto L50; - } - -/* - The nodes on the bottom level of the tree were solved - by DLASDQ. The corresponding left and right singular vector - matrices are in explicit form. First apply back the left - singular vector matrices. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nr = iwork[ndimr + i1]; - nlf = ic - nl; - nrf = ic + 1; - dgemm_("T", "N", &nl, nrhs, &nl, &c_b2453, &u[nlf + u_dim1], ldu, &b[ - nlf + b_dim1], ldb, &c_b2467, &bx[nlf + bx_dim1], ldbx); - dgemm_("T", "N", &nr, nrhs, &nr, &c_b2453, &u[nrf + u_dim1], ldu, &b[ - nrf + b_dim1], ldb, &c_b2467, &bx[nrf + bx_dim1], ldbx); -/* L10: */ - } - -/* - Next copy the rows of B that correspond to unchanged rows - in the bidiagonal matrix to BX. -*/ - - i__1 = nd; - for (i__ = 1; i__ <= i__1; ++i__) { - ic = iwork[inode + i__ - 1]; - dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); -/* L20: */ - } - -/* - Finally go through the left singular vector matrices of all - the other subproblems bottom-up on the tree. -*/ - - j = pow_ii(&c__2, &nlvl); - sqre = 0; - - for (lvl = nlvl; lvl >= 1; --lvl) { - lvl2 = (lvl << 1) - 1; - -/* - find the first node LF and last node LL on - the current level LVL -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - --j; - dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & - b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & - givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & - givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * - poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + - lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ - j], &s[j], &work[1], info); -/* L30: */ - } -/* L40: */ - } - goto L90; - -/* ICOMPQ = 1: applying back the right singular vector factors. */ - -L50: - -/* - First now go through the right singular vector matrices of all - the tree nodes top-down. -*/ - - j = 0; - i__1 = nlvl; - for (lvl = 1; lvl <= i__1; ++lvl) { - lvl2 = (lvl << 1) - 1; - -/* - Find the first node LF and last node LL on - the current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); - ll = (lf << 1) - 1; - } - i__2 = lf; - for (i__ = ll; i__ >= i__2; --i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - if (i__ == ll) { - sqre = 0; - } else { - sqre = 1; - } - ++j; - dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ - nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & - givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & - givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * - poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + - lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ - j], &s[j], &work[1], info); -/* L60: */ - } -/* L70: */ - } - -/* - The nodes on the bottom level of the tree were solved - by DLASDQ. The corresponding right singular vector - matrices are in explicit form. Apply them back. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nr = iwork[ndimr + i1]; - nlp1 = nl + 1; - if (i__ == nd) { - nrp1 = nr; - } else { - nrp1 = nr + 1; - } - nlf = ic - nl; - nrf = ic + 1; - dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b2453, &vt[nlf + vt_dim1], - ldu, &b[nlf + b_dim1], ldb, &c_b2467, &bx[nlf + bx_dim1], - ldbx); - dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b2453, &vt[nrf + vt_dim1], - ldu, &b[nrf + b_dim1], ldb, &c_b2467, &bx[nrf + bx_dim1], - ldbx); -/* L80: */ - } - -L90: - - return 0; - -/* End of DLALSA */ - -} /* dlalsa_ */ - -/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer - *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, - doublereal *rcond, integer *rank, doublereal *work, integer *iwork, - integer *info) -{ - /* System generated locals */ - integer b_dim1, b_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double log(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer c__, i__, j, k; - static doublereal r__; - static integer s, u, z__; - static doublereal cs; - static integer bx; - static doublereal sn; - static integer st, vt, nm1, st1; - static doublereal eps; - static integer iwk; - static doublereal tol; - static integer difl, difr, perm, nsub; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer nlvl, sqre, bxst; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *), - dcopy_(integer *, doublereal *, integer *, doublereal *, integer - *); - static integer poles, sizei, nsize, nwork, icmpq1, icmpq2; - - extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *), dlalsa_(integer *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - integer *, integer *), dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *); - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer - *, integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *), dlacpy_(char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *), dlaset_(char *, integer *, integer *, - doublereal *, doublereal *, doublereal *, integer *), - xerbla_(char *, integer *); - static integer givcol; - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, - integer *); - static doublereal orgnrm; - static integer givnum, givptr, smlszp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLALSD uses the singular value decomposition of A to solve the least - squares problem of finding X to minimize the Euclidean norm of each - column of A*X-B, where A is N-by-N upper bidiagonal, and X and B - are N-by-NRHS. The solution X overwrites B. - - The singular values of A smaller than RCOND times the largest - singular value are treated as zero in solving the least squares - problem; in this case a minimum norm solution is returned. - The actual singular values are returned in D in ascending order. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': D and E define an upper bidiagonal matrix. - = 'L': D and E define a lower bidiagonal matrix. - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The dimension of the bidiagonal matrix. N >= 0. - - NRHS (input) INTEGER - The number of columns of B. NRHS must be at least 1. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry D contains the main diagonal of the bidiagonal - matrix. On exit, if INFO = 0, D contains its singular values. - - E (input) DOUBLE PRECISION array, dimension (N-1) - Contains the super-diagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) - On input, B contains the right hand sides of the least - squares problem. On output, B contains the solution X. - - LDB (input) INTEGER - The leading dimension of B in the calling subprogram. - LDB must be at least max(1,N). - - RCOND (input) DOUBLE PRECISION - The singular values of A less than or equal to RCOND times - the largest singular value are treated as zero in solving - the least squares problem. If RCOND is negative, - machine precision is used instead. - For example, if diag(S)*X=B were the least squares problem, - where diag(S) is a diagonal matrix of singular values, the - solution would be X(i) = B(i) / S(i) if S(i) is greater than - RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to - RCOND*max(S). - - RANK (output) INTEGER - The number of singular values of A greater than RCOND times - the largest singular value. - - WORK (workspace) DOUBLE PRECISION array, dimension at least - (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), - where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). - - IWORK (workspace) INTEGER array, dimension at least - (3*N*NLVL + 11*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an singular value while - working on the submatrix lying in rows and columns - INFO/(N+1) through MOD(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -3; - } else if (*nrhs < 1) { - *info = -4; - } else if (*ldb < 1 || *ldb < *n) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLALSD", &i__1); - return 0; - } - - eps = EPSILON; - -/* Set up the tolerance. */ - - if (*rcond <= 0. || *rcond >= 1.) { - *rcond = eps; - } - - *rank = 0; - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } else if (*n == 1) { - if (d__[1] == 0.) { - dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb); - } else { - *rank = 1; - dlascl_("G", &c__0, &c__0, &d__[1], &c_b2453, &c__1, nrhs, &b[ - b_offset], ldb, info); - d__[1] = abs(d__[1]); - } - return 0; - } - -/* Rotate the matrix if it is lower bidiagonal. */ - - if (*(unsigned char *)uplo == 'L') { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (*nrhs == 1) { - drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & - c__1, &cs, &sn); - } else { - work[(i__ << 1) - 1] = cs; - work[i__ * 2] = sn; - } -/* L10: */ - } - if (*nrhs > 1) { - i__1 = *nrhs; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *n - 1; - for (j = 1; j <= i__2; ++j) { - cs = work[(j << 1) - 1]; - sn = work[j * 2]; - drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * - b_dim1], &c__1, &cs, &sn); -/* L20: */ - } -/* L30: */ - } - } - } - -/* Scale. */ - - nm1 = *n - 1; - orgnrm = dlanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.) { - dlaset_("A", n, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb); - return 0; - } - - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, &c__1, &d__[1], n, info); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &nm1, &c__1, &e[1], &nm1, - info); - -/* - If N is smaller than the minimum divide size SMLSIZ, then solve - the problem with another solver. -*/ - - if (*n <= *smlsiz) { - nwork = *n * *n + 1; - dlaset_("A", n, n, &c_b2467, &c_b2453, &work[1], n); - dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & - work[1], n, &b[b_offset], ldb, &work[nwork], info); - if (*info != 0) { - return 0; - } - tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (d__[i__] <= tol) { - dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &b[i__ + b_dim1] - , ldb); - } else { - dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2453, &c__1, nrhs, & - b[i__ + b_dim1], ldb, info); - ++(*rank); - } -/* L40: */ - } - dgemm_("T", "N", n, nrhs, n, &c_b2453, &work[1], n, &b[b_offset], ldb, - &c_b2467, &work[nwork], n); - dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); - -/* Unscale. */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n, - info); - dlasrt_("D", n, &d__[1], info); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, nrhs, &b[b_offset], - ldb, info); - - return 0; - } - -/* Book-keeping and setting up some constants. */ - - nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / - log(2.)) + 1; - - smlszp = *smlsiz + 1; - - u = 1; - vt = *smlsiz * *n + 1; - difl = vt + smlszp * *n; - difr = difl + nlvl * *n; - z__ = difr + (nlvl * *n << 1); - c__ = z__ + nlvl * *n; - s = c__ + *n; - poles = s + *n; - givnum = poles + (nlvl << 1) * *n; - bx = givnum + (nlvl << 1) * *n; - nwork = bx + *n * *nrhs; - - sizei = *n + 1; - k = sizei + *n; - givptr = k + *n; - perm = givptr + *n; - givcol = perm + nlvl * *n; - iwk = givcol + (nlvl * *n << 1); - - st = 1; - sqre = 0; - icmpq1 = 1; - icmpq2 = 0; - nsub = 0; - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = d__[i__], abs(d__1)) < eps) { - d__[i__] = d_sign(&eps, &d__[i__]); - } -/* L50: */ - } - - i__1 = nm1; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { - ++nsub; - iwork[nsub] = st; - -/* - Subproblem found. First determine its size and then - apply divide and conquer on it. -*/ - - if (i__ < nm1) { - -/* A subproblem with E(I) small for I < NM1. */ - - nsize = i__ - st + 1; - iwork[sizei + nsub - 1] = nsize; - } else if ((d__1 = e[i__], abs(d__1)) >= eps) { - -/* A subproblem with E(NM1) not too small but I = NM1. */ - - nsize = *n - st + 1; - iwork[sizei + nsub - 1] = nsize; - } else { - -/* - A subproblem with E(NM1) small. This implies an - 1-by-1 subproblem at D(N), which is not solved - explicitly. -*/ - - nsize = i__ - st + 1; - iwork[sizei + nsub - 1] = nsize; - ++nsub; - iwork[nsub] = *n; - iwork[sizei + nsub - 1] = 1; - dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); - } - st1 = st - 1; - if (nsize == 1) { - -/* - This is a 1-by-1 subproblem and is not solved - explicitly. -*/ - - dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); - } else if (nsize <= *smlsiz) { - -/* This is a small subproblem and is solved by DLASDQ. */ - - dlaset_("A", &nsize, &nsize, &c_b2467, &c_b2453, &work[vt + - st1], n); - dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ - st], &work[vt + st1], n, &work[nwork], n, &b[st + - b_dim1], ldb, &work[nwork], info); - if (*info != 0) { - return 0; - } - dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + - st1], n); - } else { - -/* A large problem. Solve it using divide and conquer. */ - - dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & - work[u + st1], n, &work[vt + st1], &iwork[k + st1], & - work[difl + st1], &work[difr + st1], &work[z__ + st1], - &work[poles + st1], &iwork[givptr + st1], &iwork[ - givcol + st1], n, &iwork[perm + st1], &work[givnum + - st1], &work[c__ + st1], &work[s + st1], &work[nwork], - &iwork[iwk], info); - if (*info != 0) { - return 0; - } - bxst = bx + st1; - dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & - work[bxst], n, &work[u + st1], n, &work[vt + st1], & - iwork[k + st1], &work[difl + st1], &work[difr + st1], - &work[z__ + st1], &work[poles + st1], &iwork[givptr + - st1], &iwork[givcol + st1], n, &iwork[perm + st1], & - work[givnum + st1], &work[c__ + st1], &work[s + st1], - &work[nwork], &iwork[iwk], info); - if (*info != 0) { - return 0; - } - } - st = i__ + 1; - } -/* L60: */ - } - -/* Apply the singular values and treat the tiny ones as zero. */ - - tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Some of the elements in D can be negative because 1-by-1 - subproblems were not solved explicitly. -*/ - - if ((d__1 = d__[i__], abs(d__1)) <= tol) { - dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &work[bx + i__ - 1], - n); - } else { - ++(*rank); - dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2453, &c__1, nrhs, & - work[bx + i__ - 1], n, info); - } - d__[i__] = (d__1 = d__[i__], abs(d__1)); -/* L70: */ - } - -/* Now apply back the right singular vectors. */ - - icmpq2 = 1; - i__1 = nsub; - for (i__ = 1; i__ <= i__1; ++i__) { - st = iwork[i__]; - st1 = st - 1; - nsize = iwork[sizei + i__ - 1]; - bxst = bx + st1; - if (nsize == 1) { - dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); - } else if (nsize <= *smlsiz) { - dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b2453, &work[vt + st1], - n, &work[bxst], n, &c_b2467, &b[st + b_dim1], ldb); - } else { - dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + - b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ - k + st1], &work[difl + st1], &work[difr + st1], &work[z__ - + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ - givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], - &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ - iwk], info); - if (*info != 0) { - return 0; - } - } -/* L80: */ - } - -/* Unscale and sort the singular values. */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n, info); - dlasrt_("D", n, &d__[1], info); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, nrhs, &b[b_offset], ldb, - info); - - return 0; - -/* End of DLALSD */ - -} /* dlalsd_ */ - -/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer - *dtrd1, integer *dtrd2, integer *index) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, ind1, ind2, n1sv, n2sv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAMRG will create a permutation list which will merge the elements - of A (which is composed of two independently sorted sets) into a - single set which is sorted in ascending order. - - Arguments - ========= - - N1 (input) INTEGER - N2 (input) INTEGER - These arguements contain the respective lengths of the two - sorted lists to be merged. - - A (input) DOUBLE PRECISION array, dimension (N1+N2) - The first N1 elements of A contain a list of numbers which - are sorted in either ascending or descending order. Likewise - for the final N2 elements. - - DTRD1 (input) INTEGER - DTRD2 (input) INTEGER - These are the strides to be taken through the array A. - Allowable strides are 1 and -1. They indicate whether a - subset of A is sorted in ascending (DTRDx = 1) or descending - (DTRDx = -1) order. - - INDEX (output) INTEGER array, dimension (N1+N2) - On exit this array will contain a permutation such that - if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be - sorted in ascending order. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --index; - --a; - - /* Function Body */ - n1sv = *n1; - n2sv = *n2; - if (*dtrd1 > 0) { - ind1 = 1; - } else { - ind1 = *n1; - } - if (*dtrd2 > 0) { - ind2 = *n1 + 1; - } else { - ind2 = *n1 + *n2; - } - i__ = 1; -/* while ( (N1SV > 0) & (N2SV > 0) ) */ -L10: - if (n1sv > 0 && n2sv > 0) { - if (a[ind1] <= a[ind2]) { - index[i__] = ind1; - ++i__; - ind1 += *dtrd1; - --n1sv; - } else { - index[i__] = ind2; - ++i__; - ind2 += *dtrd2; - --n2sv; - } - goto L10; - } -/* end while */ - if (n1sv == 0) { - i__1 = n2sv; - for (n1sv = 1; n1sv <= i__1; ++n1sv) { - index[i__] = ind2; - ++i__; - ind2 += *dtrd2; -/* L20: */ - } - } else { -/* N2SV .EQ. 0 */ - i__1 = n1sv; - for (n2sv = 1; n2sv <= i__1; ++n2sv) { - index[i__] = ind1; - ++i__; - ind1 += *dtrd1; -/* L30: */ - } - } - - return 0; - -/* End of DLAMRG */ - -} /* dlamrg_ */ - -doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer - *lda, doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal ret_val, d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLANGE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - DLANGE returns the value - - DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANGE as described - above. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. When M = 0, - DLANGE is set to zero. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. When N = 0, - DLANGE is set to zero. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The m by n matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(M,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= M when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (min(*m,*n) == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); - value = max(d__2,d__3); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); -/* L30: */ - } - value = max(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); -/* L60: */ - } -/* L70: */ - } - value = 0.; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of DLANGE */ - -} /* dlange_ */ - -doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, - doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublereal ret_val, d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLANHS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - Hessenberg matrix A. - - Description - =========== - - DLANHS returns the value - - DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANHS as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, DLANHS is - set to zero. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The n by n upper Hessenberg matrix A; the part of A below the - first sub-diagonal is not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= N when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); - value = max(d__2,d__3); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); -/* L30: */ - } - value = max(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); -/* L60: */ - } -/* L70: */ - } - value = 0.; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of DLANHS */ - -} /* dlanhs_ */ - -doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e) -{ - /* System generated locals */ - integer i__1; - doublereal ret_val, d__1, d__2, d__3, d__4, d__5; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__; - static doublereal sum, scale; - extern logical lsame_(char *, char *); - static doublereal anorm; - extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLANST returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real symmetric tridiagonal matrix A. - - Description - =========== - - DLANST returns the value - - DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANST as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, DLANST is - set to zero. - - D (input) DOUBLE PRECISION array, dimension (N) - The diagonal elements of A. - - E (input) DOUBLE PRECISION array, dimension (N-1) - The (n-1) sub-diagonal or super-diagonal elements of A. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - if (*n <= 0) { - anorm = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - anorm = (d__1 = d__[*n], abs(d__1)); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); - anorm = max(d__2,d__3); -/* Computing MAX */ - d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1)); - anorm = max(d__2,d__3); -/* L10: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1' || lsame_(norm, "I")) { - -/* Find norm1(A). */ - - if (*n == 1) { - anorm = abs(d__[1]); - } else { -/* Computing MAX */ - d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs( - d__1)) + (d__2 = d__[*n], abs(d__2)); - anorm = max(d__3,d__4); - i__1 = *n - 1; - for (i__ = 2; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ - i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3)); - anorm = max(d__4,d__5); -/* L20: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - if (*n > 1) { - i__1 = *n - 1; - dlassq_(&i__1, &e[1], &c__1, &scale, &sum); - sum *= 2; - } - dlassq_(n, &d__[1], &c__1, &scale, &sum); - anorm = scale * sqrt(sum); - } - - ret_val = anorm; - return ret_val; - -/* End of DLANST */ - -} /* dlanst_ */ - -doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer - *lda, doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal ret_val, d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, absa, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLANSY returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real symmetric matrix A. - - Description - =========== - - DLANSY returns the value - - DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in DLANSY as described - above. - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is to be referenced. - = 'U': Upper triangular part of A is referenced - = 'L': Lower triangular part of A is referenced - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, DLANSY is - set to zero. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The symmetric matrix A. If UPLO = 'U', the leading n by n - upper triangular part of A contains the upper triangular part - of the matrix A, and the strictly lower triangular part of A - is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of A contains the lower triangular part of - the matrix A, and the strictly upper triangular part of A is - not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, - WORK is not referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( - d__1)); - value = max(d__2,d__3); -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( - d__1)); - value = max(d__2,d__3); -/* L30: */ - } -/* L40: */ - } - } - } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { - -/* Find normI(A) ( = norm1(A), since A is symmetric). */ - - value = 0.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); - sum += absa; - work[i__] += absa; -/* L50: */ - } - work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); -/* L60: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L70: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L80: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); - sum += absa; - work[i__] += absa; -/* L90: */ - } - value = max(value,sum); -/* L100: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L110: */ - } - } else { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); -/* L120: */ - } - } - sum *= 2; - i__1 = *lda + 1; - dlassq_(n, &a[a_offset], &i__1, &scale, &sum); - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of DLANSY */ - -} /* dlansy_ */ - -/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, - doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, - doublereal *rt2i, doublereal *cs, doublereal *sn) -{ - /* System generated locals */ - doublereal d__1, d__2; - - /* Builtin functions */ - double d_sign(doublereal *, doublereal *), sqrt(doublereal); - - /* Local variables */ - static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, - temp, scale, bcmax, bcmis, sigma; - - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric - matrix in standard form: - - [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] - [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] - - where either - 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or - 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex - conjugate eigenvalues. - - Arguments - ========= - - A (input/output) DOUBLE PRECISION - B (input/output) DOUBLE PRECISION - C (input/output) DOUBLE PRECISION - D (input/output) DOUBLE PRECISION - On entry, the elements of the input matrix. - On exit, they are overwritten by the elements of the - standardised Schur form. - - RT1R (output) DOUBLE PRECISION - RT1I (output) DOUBLE PRECISION - RT2R (output) DOUBLE PRECISION - RT2I (output) DOUBLE PRECISION - The real and imaginary parts of the eigenvalues. If the - eigenvalues are a complex conjugate pair, RT1I > 0. - - CS (output) DOUBLE PRECISION - SN (output) DOUBLE PRECISION - Parameters of the rotation matrix. - - Further Details - =============== - - Modified by V. Sima, Research Institute for Informatics, Bucharest, - Romania, to reduce the risk of cancellation errors, - when computing real eigenvalues, and to ensure, if possible, that - abs(RT1R) >= abs(RT2R). - - ===================================================================== -*/ - - - eps = PRECISION; - if (*c__ == 0.) { - *cs = 1.; - *sn = 0.; - goto L10; - - } else if (*b == 0.) { - -/* Swap rows and columns */ - - *cs = 0.; - *sn = 1.; - temp = *d__; - *d__ = *a; - *a = temp; - *b = -(*c__); - *c__ = 0.; - goto L10; - } else if (*a - *d__ == 0. && d_sign(&c_b2453, b) != d_sign(&c_b2453, c__) - ) { - *cs = 1.; - *sn = 0.; - goto L10; - } else { - - temp = *a - *d__; - p = temp * .5; -/* Computing MAX */ - d__1 = abs(*b), d__2 = abs(*c__); - bcmax = max(d__1,d__2); -/* Computing MIN */ - d__1 = abs(*b), d__2 = abs(*c__); - bcmis = min(d__1,d__2) * d_sign(&c_b2453, b) * d_sign(&c_b2453, c__); -/* Computing MAX */ - d__1 = abs(p); - scale = max(d__1,bcmax); - z__ = p / scale * p + bcmax / scale * bcmis; - -/* - If Z is of the order of the machine accuracy, postpone the - decision on the nature of eigenvalues -*/ - - if (z__ >= eps * 4.) { - -/* Real eigenvalues. Compute A and D. */ - - d__1 = sqrt(scale) * sqrt(z__); - z__ = p + d_sign(&d__1, &p); - *a = *d__ + z__; - *d__ -= bcmax / z__ * bcmis; - -/* Compute B and the rotation matrix */ - - tau = dlapy2_(c__, &z__); - *cs = z__ / tau; - *sn = *c__ / tau; - *b -= *c__; - *c__ = 0.; - } else { - -/* - Complex eigenvalues, or real (almost) equal eigenvalues. - Make diagonal elements equal. -*/ - - sigma = *b + *c__; - tau = dlapy2_(&sigma, &temp); - *cs = sqrt((abs(sigma) / tau + 1.) * .5); - *sn = -(p / (tau * *cs)) * d_sign(&c_b2453, &sigma); - -/* - Compute [ AA BB ] = [ A B ] [ CS -SN ] - [ CC DD ] [ C D ] [ SN CS ] -*/ - - aa = *a * *cs + *b * *sn; - bb = -(*a) * *sn + *b * *cs; - cc = *c__ * *cs + *d__ * *sn; - dd = -(*c__) * *sn + *d__ * *cs; - -/* - Compute [ A B ] = [ CS SN ] [ AA BB ] - [ C D ] [-SN CS ] [ CC DD ] -*/ - - *a = aa * *cs + cc * *sn; - *b = bb * *cs + dd * *sn; - *c__ = -aa * *sn + cc * *cs; - *d__ = -bb * *sn + dd * *cs; - - temp = (*a + *d__) * .5; - *a = temp; - *d__ = temp; - - if (*c__ != 0.) { - if (*b != 0.) { - if (d_sign(&c_b2453, b) == d_sign(&c_b2453, c__)) { - -/* Real eigenvalues: reduce to upper triangular form */ - - sab = sqrt((abs(*b))); - sac = sqrt((abs(*c__))); - d__1 = sab * sac; - p = d_sign(&d__1, c__); - tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); - *a = temp + p; - *d__ = temp - p; - *b -= *c__; - *c__ = 0.; - cs1 = sab * tau; - sn1 = sac * tau; - temp = *cs * cs1 - *sn * sn1; - *sn = *cs * sn1 + *sn * cs1; - *cs = temp; - } - } else { - *b = -(*c__); - *c__ = 0.; - temp = *cs; - *cs = -(*sn); - *sn = temp; - } - } - } - - } - -L10: - -/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ - - *rt1r = *a; - *rt2r = *d__; - if (*c__ == 0.) { - *rt1i = 0.; - *rt2i = 0.; - } else { - *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); - *rt2i = -(*rt1i); - } - return 0; - -/* End of DLANV2 */ - -} /* dlanv2_ */ - -doublereal dlapy2_(doublereal *x, doublereal *y) -{ - /* System generated locals */ - doublereal ret_val, d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal w, z__, xabs, yabs; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary - overflow. - - Arguments - ========= - - X (input) DOUBLE PRECISION - Y (input) DOUBLE PRECISION - X and Y specify the values x and y. - - ===================================================================== -*/ - - - xabs = abs(*x); - yabs = abs(*y); - w = max(xabs,yabs); - z__ = min(xabs,yabs); - if (z__ == 0.) { - ret_val = w; - } else { -/* Computing 2nd power */ - d__1 = z__ / w; - ret_val = w * sqrt(d__1 * d__1 + 1.); - } - return ret_val; - -/* End of DLAPY2 */ - -} /* dlapy2_ */ - -doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__) -{ - /* System generated locals */ - doublereal ret_val, d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal w, xabs, yabs, zabs; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause - unnecessary overflow. - - Arguments - ========= - - X (input) DOUBLE PRECISION - Y (input) DOUBLE PRECISION - Z (input) DOUBLE PRECISION - X, Y and Z specify the values x, y and z. - - ===================================================================== -*/ - - - xabs = abs(*x); - yabs = abs(*y); - zabs = abs(*z__); -/* Computing MAX */ - d__1 = max(xabs,yabs); - w = max(d__1,zabs); - if (w == 0.) { - ret_val = 0.; - } else { -/* Computing 2nd power */ - d__1 = xabs / w; -/* Computing 2nd power */ - d__2 = yabs / w; -/* Computing 2nd power */ - d__3 = zabs / w; - ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); - } - return ret_val; - -/* End of DLAPY3 */ - -} /* dlapy3_ */ - -/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v, - integer *incv, doublereal *tau, doublereal *c__, integer *ldc, - doublereal *work) -{ - /* System generated locals */ - integer c_dim1, c_offset; - doublereal d__1; - - /* Local variables */ - extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLARF applies a real elementary reflector H to a real m by n matrix - C, from either the left or the right. H is represented in the form - - H = I - tau * v * v' - - where tau is a real scalar and v is a real vector. - - If tau = 0, then H is taken to be the unit matrix. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) DOUBLE PRECISION array, dimension - (1 + (M-1)*abs(INCV)) if SIDE = 'L' - or (1 + (N-1)*abs(INCV)) if SIDE = 'R' - The vector v in the representation of H. V is not used if - TAU = 0. - - INCV (input) INTEGER - The increment between elements of v. INCV <> 0. - - TAU (input) DOUBLE PRECISION - The value tau in the representation of H. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (lsame_(side, "L")) { - -/* Form H * C */ - - if (*tau != 0.) { - -/* w := C' * v */ - - dgemv_("Transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], - incv, &c_b2467, &work[1], &c__1); - -/* C := C - v * w' */ - - d__1 = -(*tau); - dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], - ldc); - } - } else { - -/* Form C * H */ - - if (*tau != 0.) { - -/* w := C * v */ - - dgemv_("No transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], - incv, &c_b2467, &work[1], &c__1); - -/* C := C - w * v' */ - - d__1 = -(*tau); - dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], - ldc); - } - } - return 0; - -/* End of DLARF */ - -} /* dlarf_ */ - -/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char * - storev, integer *m, integer *n, integer *k, doublereal *v, integer * - ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, - doublereal *work, integer *ldwork) -{ - /* System generated locals */ - integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, - work_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *), dtrmm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *); - static char transt[1]; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLARFB applies a real block reflector H or its transpose H' to a - real m by n matrix C, from either the left or the right. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply H or H' from the Left - = 'R': apply H or H' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply H (No transpose) - = 'T': apply H' (Transpose) - - DIRECT (input) CHARACTER*1 - Indicates how H is formed from a product of elementary - reflectors - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Indicates how the vectors which define the elementary - reflectors are stored: - = 'C': Columnwise - = 'R': Rowwise - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - K (input) INTEGER - The order of the matrix T (= the number of elementary - reflectors whose product defines the block reflector). - - V (input) DOUBLE PRECISION array, dimension - (LDV,K) if STOREV = 'C' - (LDV,M) if STOREV = 'R' and SIDE = 'L' - (LDV,N) if STOREV = 'R' and SIDE = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); - if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); - if STOREV = 'R', LDV >= K. - - T (input) DOUBLE PRECISION array, dimension (LDT,K) - The triangular k by k matrix T in the representation of the - block reflector. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by H*C or H'*C or C*H or C*H'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= max(1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) - - LDWORK (input) INTEGER - The leading dimension of the array WORK. - If SIDE = 'L', LDWORK >= max(1,N); - if SIDE = 'R', LDWORK >= max(1,M). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - work_dim1 = *ldwork; - work_offset = 1 + work_dim1; - work -= work_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (lsame_(trans, "N")) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - - if (lsame_(storev, "C")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 ) (first K rows) - ( V2 ) - where V1 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); -/* L10: */ - } - -/* W := W * V1 */ - - dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, & - c_b2453, &v[v_offset], ldv, &work[work_offset], - ldwork); - if (*m > *k) { - -/* W := W + C2'*V2 */ - - i__1 = *m - *k; - dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2453, - &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], - ldv, &c_b2453, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2 * W' */ - - i__1 = *m - *k; - dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b2589, - &v[*k + 1 + v_dim1], ldv, &work[work_offset], - ldwork, &c_b2453, &c__[*k + 1 + c_dim1], ldc); - } - -/* W := W * V1' */ - - dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2453, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; -/* L20: */ - } -/* L30: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L40: */ - } - -/* W := W * V1 */ - - dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, & - c_b2453, &v[v_offset], ldv, &work[work_offset], - ldwork); - if (*n > *k) { - -/* W := W + C2 * V2 */ - - i__1 = *n - *k; - dgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b2453, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k - + 1 + v_dim1], ldv, &c_b2453, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C2 := C2 - W * V2' */ - - i__1 = *n - *k; - dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b2589, - &work[work_offset], ldwork, &v[*k + 1 + v_dim1], - ldv, &c_b2453, &c__[(*k + 1) * c_dim1 + 1], ldc); - } - -/* W := W * V1' */ - - dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2453, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; -/* L50: */ - } -/* L60: */ - } - } - - } else { - -/* - Let V = ( V1 ) - ( V2 ) (last K rows) - where V2 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); -/* L70: */ - } - -/* W := W * V2 */ - - dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, & - c_b2453, &v[*m - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - if (*m > *k) { - -/* W := W + C1'*V1 */ - - i__1 = *m - *k; - dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2453, - &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453, - &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1 * W' */ - - i__1 = *m - *k; - dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b2589, - &v[v_offset], ldv, &work[work_offset], ldwork, & - c_b2453, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2453, - &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * - work_dim1]; -/* L80: */ - } -/* L90: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L100: */ - } - -/* W := W * V2 */ - - dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, & - c_b2453, &v[*n - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - if (*n > *k) { - -/* W := W + C1 * V1 */ - - i__1 = *n - *k; - dgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b2453, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b2453, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C1 := C1 - W * V1' */ - - i__1 = *n - *k; - dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b2589, - &work[work_offset], ldwork, &v[v_offset], ldv, & - c_b2453, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2453, - &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - -/* C2 := C2 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * - work_dim1]; -/* L110: */ - } -/* L120: */ - } - } - } - - } else if (lsame_(storev, "R")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 V2 ) (V1: first K columns) - where V1 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); -/* L130: */ - } - -/* W := W * V1' */ - - dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2453, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2' */ - - i__1 = *m - *k; - dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2453, & - c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + - 1], ldv, &c_b2453, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2' * W' */ - - i__1 = *m - *k; - dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b2589, & - v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset], - ldwork, &c_b2453, &c__[*k + 1 + c_dim1], ldc); - } - -/* W := W * V1 */ - - dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, & - c_b2453, &v[v_offset], ldv, &work[work_offset], - ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; -/* L140: */ - } -/* L150: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L160: */ - } - -/* W := W * V1' */ - - dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2453, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2' */ - - i__1 = *n - *k; - dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2453, - &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * - v_dim1 + 1], ldv, &c_b2453, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C2 := C2 - W * V2 */ - - i__1 = *n - *k; - dgemm_("No transpose", "No transpose", m, &i__1, k, & - c_b2589, &work[work_offset], ldwork, &v[(*k + 1) * - v_dim1 + 1], ldv, &c_b2453, &c__[(*k + 1) * - c_dim1 + 1], ldc); - } - -/* W := W * V1 */ - - dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, & - c_b2453, &v[v_offset], ldv, &work[work_offset], - ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; -/* L170: */ - } -/* L180: */ - } - - } - - } else { - -/* - Let V = ( V1 V2 ) (V2: last K columns) - where V2 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); -/* L190: */ - } - -/* W := W * V2' */ - - dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2453, - &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*m > *k) { - -/* W := W + C1'*V1' */ - - i__1 = *m - *k; - dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2453, & - c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453, & - work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1' * W' */ - - i__1 = *m - *k; - dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b2589, & - v[v_offset], ldv, &work[work_offset], ldwork, & - c_b2453, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, & - c_b2453, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * - work_dim1]; -/* L200: */ - } -/* L210: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L220: */ - } - -/* W := W * V2' */ - - dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2453, - &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*n > *k) { - -/* W := W + C1 * V1' */ - - i__1 = *n - *k; - dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2453, - &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453, - &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2453, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C1 := C1 - W * V1 */ - - i__1 = *n - *k; - dgemm_("No transpose", "No transpose", m, &i__1, k, & - c_b2589, &work[work_offset], ldwork, &v[v_offset], - ldv, &c_b2453, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, & - c_b2453, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * - work_dim1]; -/* L230: */ - } -/* L240: */ - } - - } - - } - } - - return 0; - -/* End of DLARFB */ - -} /* dlarfb_ */ - -/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x, - integer *incx, doublereal *tau) -{ - /* System generated locals */ - integer i__1; - doublereal d__1; - - /* Builtin functions */ - double d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer j, knt; - static doublereal beta; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - static doublereal xnorm; - - static doublereal safmin, rsafmn; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLARFG generates a real elementary reflector H of order n, such - that - - H * ( alpha ) = ( beta ), H' * H = I. - ( x ) ( 0 ) - - where alpha and beta are scalars, and x is an (n-1)-element real - vector. H is represented in the form - - H = I - tau * ( 1 ) * ( 1 v' ) , - ( v ) - - where tau is a real scalar and v is a real (n-1)-element - vector. - - If the elements of x are all zero, then tau = 0 and H is taken to be - the unit matrix. - - Otherwise 1 <= tau <= 2. - - Arguments - ========= - - N (input) INTEGER - The order of the elementary reflector. - - ALPHA (input/output) DOUBLE PRECISION - On entry, the value alpha. - On exit, it is overwritten with the value beta. - - X (input/output) DOUBLE PRECISION array, dimension - (1+(N-2)*abs(INCX)) - On entry, the vector x. - On exit, it is overwritten with the vector v. - - INCX (input) INTEGER - The increment between elements of X. INCX > 0. - - TAU (output) DOUBLE PRECISION - The value tau. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n <= 1) { - *tau = 0.; - return 0; - } - - i__1 = *n - 1; - xnorm = dnrm2_(&i__1, &x[1], incx); - - if (xnorm == 0.) { - -/* H = I */ - - *tau = 0.; - } else { - -/* general case */ - - d__1 = dlapy2_(alpha, &xnorm); - beta = -d_sign(&d__1, alpha); - safmin = SAFEMINIMUM / EPSILON; - if (abs(beta) < safmin) { - -/* XNORM, BETA may be inaccurate; scale X and recompute them */ - - rsafmn = 1. / safmin; - knt = 0; -L10: - ++knt; - i__1 = *n - 1; - dscal_(&i__1, &rsafmn, &x[1], incx); - beta *= rsafmn; - *alpha *= rsafmn; - if (abs(beta) < safmin) { - goto L10; - } - -/* New BETA is at most 1, at least SAFMIN */ - - i__1 = *n - 1; - xnorm = dnrm2_(&i__1, &x[1], incx); - d__1 = dlapy2_(alpha, &xnorm); - beta = -d_sign(&d__1, alpha); - *tau = (beta - *alpha) / beta; - i__1 = *n - 1; - d__1 = 1. / (*alpha - beta); - dscal_(&i__1, &d__1, &x[1], incx); - -/* If ALPHA is subnormal, it may lose relative accuracy */ - - *alpha = beta; - i__1 = knt; - for (j = 1; j <= i__1; ++j) { - *alpha *= safmin; -/* L20: */ - } - } else { - *tau = (beta - *alpha) / beta; - i__1 = *n - 1; - d__1 = 1. / (*alpha - beta); - dscal_(&i__1, &d__1, &x[1], incx); - *alpha = beta; - } - } - - return 0; - -/* End of DLARFG */ - -} /* dlarfg_ */ - -/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * - k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, - integer *ldt) -{ - /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; - doublereal d__1; - - /* Local variables */ - static integer i__, j; - static doublereal vii; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *), dtrmv_(char *, - char *, char *, integer *, doublereal *, integer *, doublereal *, - integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLARFT forms the triangular factor T of a real block reflector H - of order n, which is defined as a product of k elementary reflectors. - - If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; - - If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. - - If STOREV = 'C', the vector which defines the elementary reflector - H(i) is stored in the i-th column of the array V, and - - H = I - V * T * V' - - If STOREV = 'R', the vector which defines the elementary reflector - H(i) is stored in the i-th row of the array V, and - - H = I - V' * T * V - - Arguments - ========= - - DIRECT (input) CHARACTER*1 - Specifies the order in which the elementary reflectors are - multiplied to form the block reflector: - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Specifies how the vectors which define the elementary - reflectors are stored (see also Further Details): - = 'C': columnwise - = 'R': rowwise - - N (input) INTEGER - The order of the block reflector H. N >= 0. - - K (input) INTEGER - The order of the triangular factor T (= the number of - elementary reflectors). K >= 1. - - V (input/output) DOUBLE PRECISION array, dimension - (LDV,K) if STOREV = 'C' - (LDV,N) if STOREV = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i). - - T (output) DOUBLE PRECISION array, dimension (LDT,K) - The k by k triangular factor T of the block reflector. - If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is - lower triangular. The rest of the array is not used. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - Further Details - =============== - - The shape of the matrix V and the storage of the vectors which define - the H(i) is best illustrated by the following example with n = 5 and - k = 3. The elements equal to 1 are not stored; the corresponding - array elements are modified but restored on exit. The rest of the - array is not used. - - DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': - - V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) - ( v1 1 ) ( 1 v2 v2 v2 ) - ( v1 v2 1 ) ( 1 v3 v3 ) - ( v1 v2 v3 ) - ( v1 v2 v3 ) - - DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': - - V = ( v1 v2 v3 ) V = ( v1 v1 1 ) - ( v1 v2 v3 ) ( v2 v2 v2 1 ) - ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) - ( 1 v3 ) - ( 1 ) - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - --tau; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - - /* Function Body */ - if (*n == 0) { - return 0; - } - - if (lsame_(direct, "F")) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - if (tau[i__] == 0.) { - -/* H(i) = I */ - - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = 0.; -/* L10: */ - } - } else { - -/* general case */ - - vii = v[i__ + i__ * v_dim1]; - v[i__ + i__ * v_dim1] = 1.; - if (lsame_(storev, "C")) { - -/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - d__1 = -tau[i__]; - dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], - ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b2467, &t[ - i__ * t_dim1 + 1], &c__1); - } else { - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ - - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - d__1 = -tau[i__]; - dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * - v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & - c_b2467, &t[i__ * t_dim1 + 1], &c__1); - } - v[i__ + i__ * v_dim1] = vii; - -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - t[i__ + i__ * t_dim1] = tau[i__]; - } -/* L20: */ - } - } else { - for (i__ = *k; i__ >= 1; --i__) { - if (tau[i__] == 0.) { - -/* H(i) = I */ - - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - t[j + i__ * t_dim1] = 0.; -/* L30: */ - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { - vii = v[*n - *k + i__ + i__ * v_dim1]; - v[*n - *k + i__ + i__ * v_dim1] = 1.; - -/* - T(i+1:k,i) := - - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) -*/ - - i__1 = *n - *k + i__; - i__2 = *k - i__; - d__1 = -tau[i__]; - dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1) - * v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], & - c__1, &c_b2467, &t[i__ + 1 + i__ * t_dim1], & - c__1); - v[*n - *k + i__ + i__ * v_dim1] = vii; - } else { - vii = v[i__ + (*n - *k + i__) * v_dim1]; - v[i__ + (*n - *k + i__) * v_dim1] = 1.; - -/* - T(i+1:k,i) := - - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' -*/ - - i__1 = *k - i__; - i__2 = *n - *k + i__; - d__1 = -tau[i__]; - dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + - 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & - c_b2467, &t[i__ + 1 + i__ * t_dim1], &c__1); - v[i__ + (*n - *k + i__) * v_dim1] = vii; - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - } - t[i__ + i__ * t_dim1] = tau[i__]; - } -/* L40: */ - } - } - return 0; - -/* End of DLARFT */ - -} /* dlarft_ */ - -/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal * - v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) -{ - /* System generated locals */ - integer c_dim1, c_offset, i__1; - doublereal d__1; - - /* Local variables */ - static integer j; - static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, - v6, v7, v8, v9, t10, v10, sum; - extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLARFX applies a real elementary reflector H to a real m by n - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a real scalar and v is a real vector. - - If tau = 0, then H is taken to be the unit matrix - - This version uses inline code if H has order < 11. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' - or (N) if SIDE = 'R' - The vector v in the representation of H. - - TAU (input) DOUBLE PRECISION - The value tau in the representation of H. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= (1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - WORK is not referenced if H has order < 11. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (*tau == 0.) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form H * C, where H has order m. */ - - switch (*m) { - case 1: goto L10; - case 2: goto L30; - case 3: goto L50; - case 4: goto L70; - case 5: goto L90; - case 6: goto L110; - case 7: goto L130; - case 8: goto L150; - case 9: goto L170; - case 10: goto L190; - } - -/* - Code for general M - - w := C'*v -*/ - - dgemv_("Transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], &c__1, - &c_b2467, &work[1], &c__1); - -/* C := C - tau * v * w' */ - - d__1 = -(*tau); - dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc) - ; - goto L410; -L10: - -/* Special code for 1 x 1 Householder */ - - t1 = 1. - *tau * v[1] * v[1]; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; -/* L20: */ - } - goto L410; -L30: - -/* Special code for 2 x 2 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; -/* L40: */ - } - goto L410; -L50: - -/* Special code for 3 x 3 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; -/* L60: */ - } - goto L410; -L70: - -/* Special code for 4 x 4 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; -/* L80: */ - } - goto L410; -L90: - -/* Special code for 5 x 5 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; -/* L100: */ - } - goto L410; -L110: - -/* Special code for 6 x 6 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; -/* L120: */ - } - goto L410; -L130: - -/* Special code for 7 x 7 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; -/* L140: */ - } - goto L410; -L150: - -/* Special code for 8 x 8 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; -/* L160: */ - } - goto L410; -L170: - -/* Special code for 9 x 9 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * - c_dim1 + 9]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; - c__[j * c_dim1 + 9] -= sum * t9; -/* L180: */ - } - goto L410; -L190: - -/* Special code for 10 x 10 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - v10 = v[10]; - t10 = *tau * v10; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * - c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; - c__[j * c_dim1 + 9] -= sum * t9; - c__[j * c_dim1 + 10] -= sum * t10; -/* L200: */ - } - goto L410; - } else { - -/* Form C * H, where H has order n. */ - - switch (*n) { - case 1: goto L210; - case 2: goto L230; - case 3: goto L250; - case 4: goto L270; - case 5: goto L290; - case 6: goto L310; - case 7: goto L330; - case 8: goto L350; - case 9: goto L370; - case 10: goto L390; - } - -/* - Code for general N - - w := C * v -*/ - - dgemv_("No transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], & - c__1, &c_b2467, &work[1], &c__1); - -/* C := C - tau * w * v' */ - - d__1 = -(*tau); - dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc) - ; - goto L410; -L210: - -/* Special code for 1 x 1 Householder */ - - t1 = 1. - *tau * v[1] * v[1]; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - c__[j + c_dim1] = t1 * c__[j + c_dim1]; -/* L220: */ - } - goto L410; -L230: - -/* Special code for 2 x 2 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; -/* L240: */ - } - goto L410; -L250: - -/* Special code for 3 x 3 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; -/* L260: */ - } - goto L410; -L270: - -/* Special code for 4 x 4 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; -/* L280: */ - } - goto L410; -L290: - -/* Special code for 5 x 5 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; -/* L300: */ - } - goto L410; -L310: - -/* Special code for 6 x 6 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; -/* L320: */ - } - goto L410; -L330: - -/* Special code for 7 x 7 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; -/* L340: */ - } - goto L410; -L350: - -/* Special code for 8 x 8 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; -/* L360: */ - } - goto L410; -L370: - -/* Special code for 9 x 9 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ - j + c_dim1 * 9]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; - c__[j + c_dim1 * 9] -= sum * t9; -/* L380: */ - } - goto L410; -L390: - -/* Special code for 10 x 10 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - v10 = v[10]; - t10 = *tau * v10; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ - j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; - c__[j + c_dim1 * 9] -= sum * t9; - c__[j + c_dim1 * 10] -= sum * t10; -/* L400: */ - } - goto L410; - } -L410: - return 0; - -/* End of DLARFX */ - -} /* dlarfx_ */ - -/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, - doublereal *sn, doublereal *r__) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - - /* Builtin functions */ - double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); - - /* Local variables */ - static integer i__; - static doublereal f1, g1, eps, scale; - static integer count; - static doublereal safmn2, safmx2; - - static doublereal safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLARTG generate a plane rotation so that - - [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. - [ -SN CS ] [ G ] [ 0 ] - - This is a slower, more accurate version of the BLAS1 routine DROTG, - with the following other differences: - F and G are unchanged on return. - If G=0, then CS=1 and SN=0. - If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any - floating point operations (saves work in DBDSQR when - there are zeros on the diagonal). - - If F exceeds G in magnitude, CS will be positive. - - Arguments - ========= - - F (input) DOUBLE PRECISION - The first component of vector to be rotated. - - G (input) DOUBLE PRECISION - The second component of vector to be rotated. - - CS (output) DOUBLE PRECISION - The cosine of the rotation. - - SN (output) DOUBLE PRECISION - The sine of the rotation. - - R (output) DOUBLE PRECISION - The nonzero component of the rotated vector. - - ===================================================================== -*/ - - - if (first) { - first = FALSE_; - safmin = SAFEMINIMUM; - eps = EPSILON; - d__1 = BASE; - i__1 = (integer) (log(safmin / eps) / log(BASE) / - 2.); - safmn2 = pow_di(&d__1, &i__1); - safmx2 = 1. / safmn2; - } - if (*g == 0.) { - *cs = 1.; - *sn = 0.; - *r__ = *f; - } else if (*f == 0.) { - *cs = 0.; - *sn = 1.; - *r__ = *g; - } else { - f1 = *f; - g1 = *g; -/* Computing MAX */ - d__1 = abs(f1), d__2 = abs(g1); - scale = max(d__1,d__2); - if (scale >= safmx2) { - count = 0; -L10: - ++count; - f1 *= safmn2; - g1 *= safmn2; -/* Computing MAX */ - d__1 = abs(f1), d__2 = abs(g1); - scale = max(d__1,d__2); - if (scale >= safmx2) { - goto L10; - } -/* Computing 2nd power */ - d__1 = f1; -/* Computing 2nd power */ - d__2 = g1; - *r__ = sqrt(d__1 * d__1 + d__2 * d__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - i__1 = count; - for (i__ = 1; i__ <= i__1; ++i__) { - *r__ *= safmx2; -/* L20: */ - } - } else if (scale <= safmn2) { - count = 0; -L30: - ++count; - f1 *= safmx2; - g1 *= safmx2; -/* Computing MAX */ - d__1 = abs(f1), d__2 = abs(g1); - scale = max(d__1,d__2); - if (scale <= safmn2) { - goto L30; - } -/* Computing 2nd power */ - d__1 = f1; -/* Computing 2nd power */ - d__2 = g1; - *r__ = sqrt(d__1 * d__1 + d__2 * d__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - i__1 = count; - for (i__ = 1; i__ <= i__1; ++i__) { - *r__ *= safmn2; -/* L40: */ - } - } else { -/* Computing 2nd power */ - d__1 = f1; -/* Computing 2nd power */ - d__2 = g1; - *r__ = sqrt(d__1 * d__1 + d__2 * d__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - } - if (abs(*f) > abs(*g) && *cs < 0.) { - *cs = -(*cs); - *sn = -(*sn); - *r__ = -(*r__); - } - } - return 0; - -/* End of DLARTG */ - -} /* dlartg_ */ - -/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__, - doublereal *ssmin, doublereal *ssmax) -{ - /* System generated locals */ - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DLAS2 computes the singular values of the 2-by-2 matrix - [ F G ] - [ 0 H ]. - On return, SSMIN is the smaller singular value and SSMAX is the - larger singular value. - - Arguments - ========= - - F (input) DOUBLE PRECISION - The (1,1) element of the 2-by-2 matrix. - - G (input) DOUBLE PRECISION - The (1,2) element of the 2-by-2 matrix. - - H (input) DOUBLE PRECISION - The (2,2) element of the 2-by-2 matrix. - - SSMIN (output) DOUBLE PRECISION - The smaller singular value. - - SSMAX (output) DOUBLE PRECISION - The larger singular value. - - Further Details - =============== - - Barring over/underflow, all output quantities are correct to within - a few units in the last place (ulps), even in the absence of a guard - digit in addition/subtraction. - - In IEEE arithmetic, the code works correctly if one matrix element is - infinite. - - Overflow will not occur unless the largest singular value itself - overflows, or is within a few ulps of overflow. (On machines with - partial overflow, like the Cray, overflow may occur if the largest - singular value is within a factor of 2 of overflow.) - - Underflow is harmless if underflow is gradual. Otherwise, results - may correspond to a matrix modified by perturbations of size near - the underflow threshold. - - ==================================================================== -*/ - - - fa = abs(*f); - ga = abs(*g); - ha = abs(*h__); - fhmn = min(fa,ha); - fhmx = max(fa,ha); - if (fhmn == 0.) { - *ssmin = 0.; - if (fhmx == 0.) { - *ssmax = ga; - } else { -/* Computing 2nd power */ - d__1 = min(fhmx,ga) / max(fhmx,ga); - *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); - } - } else { - if (ga < fhmx) { - as = fhmn / fhmx + 1.; - at = (fhmx - fhmn) / fhmx; -/* Computing 2nd power */ - d__1 = ga / fhmx; - au = d__1 * d__1; - c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); - *ssmin = fhmn * c__; - *ssmax = fhmx / c__; - } else { - au = fhmx / ga; - if (au == 0.) { - -/* - Avoid possible harmful underflow if exponent range - asymmetric (true SSMIN may not underflow even if - AU underflows) -*/ - - *ssmin = fhmn * fhmx / ga; - *ssmax = ga; - } else { - as = fhmn / fhmx + 1.; - at = (fhmx - fhmn) / fhmx; -/* Computing 2nd power */ - d__1 = as * au; -/* Computing 2nd power */ - d__2 = at * au; - c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); - *ssmin = fhmn * c__ * au; - *ssmin += *ssmin; - *ssmax = ga / (c__ + c__); - } - } - } - return 0; - -/* End of DLAS2 */ - -} /* dlas2_ */ - -/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, - doublereal *cfrom, doublereal *cto, integer *m, integer *n, - doublereal *a, integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - - /* Local variables */ - static integer i__, j, k1, k2, k3, k4; - static doublereal mul, cto1; - static logical done; - static doublereal ctoc; - extern logical lsame_(char *, char *); - static integer itype; - static doublereal cfrom1; - - static doublereal cfromc; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal bignum, smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLASCL multiplies the M by N real matrix A by the real scalar - CTO/CFROM. This is done without over/underflow as long as the final - result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that - A may be full, upper triangular, lower triangular, upper Hessenberg, - or banded. - - Arguments - ========= - - TYPE (input) CHARACTER*1 - TYPE indices the storage type of the input matrix. - = 'G': A is a full matrix. - = 'L': A is a lower triangular matrix. - = 'U': A is an upper triangular matrix. - = 'H': A is an upper Hessenberg matrix. - = 'B': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the lower - half stored. - = 'Q': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the upper - half stored. - = 'Z': A is a band matrix with lower bandwidth KL and upper - bandwidth KU. - - KL (input) INTEGER - The lower bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - KU (input) INTEGER - The upper bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - CFROM (input) DOUBLE PRECISION - CTO (input) DOUBLE PRECISION - The matrix A is multiplied by CTO/CFROM. A(I,J) is computed - without over/underflow if the final result CTO*A(I,J)/CFROM - can be represented without over/underflow. CFROM must be - nonzero. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,M) - The matrix to be multiplied by CTO/CFROM. See TYPE for the - storage type. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - INFO (output) INTEGER - 0 - successful exit - <0 - if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - - if (lsame_(type__, "G")) { - itype = 0; - } else if (lsame_(type__, "L")) { - itype = 1; - } else if (lsame_(type__, "U")) { - itype = 2; - } else if (lsame_(type__, "H")) { - itype = 3; - } else if (lsame_(type__, "B")) { - itype = 4; - } else if (lsame_(type__, "Q")) { - itype = 5; - } else if (lsame_(type__, "Z")) { - itype = 6; - } else { - itype = -1; - } - - if (itype == -1) { - *info = -1; - } else if (*cfrom == 0.) { - *info = -4; - } else if (*m < 0) { - *info = -6; - } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { - *info = -7; - } else if (itype <= 3 && *lda < max(1,*m)) { - *info = -9; - } else if (itype >= 4) { -/* Computing MAX */ - i__1 = *m - 1; - if (*kl < 0 || *kl > max(i__1,0)) { - *info = -2; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *n - 1; - if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && - *kl != *ku) { - *info = -3; - } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * - ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { - *info = -9; - } - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASCL", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *m == 0) { - return 0; - } - -/* Get machine parameters */ - - smlnum = SAFEMINIMUM; - bignum = 1. / smlnum; - - cfromc = *cfrom; - ctoc = *cto; - -L10: - cfrom1 = cfromc * smlnum; - cto1 = ctoc / bignum; - if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { - mul = smlnum; - done = FALSE_; - cfromc = cfrom1; - } else if (abs(cto1) > abs(cfromc)) { - mul = bignum; - done = FALSE_; - ctoc = cto1; - } else { - mul = ctoc / cfromc; - done = TRUE_; - } - - if (itype == 0) { - -/* Full matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L20: */ - } -/* L30: */ - } - - } else if (itype == 1) { - -/* Lower triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L40: */ - } -/* L50: */ - } - - } else if (itype == 2) { - -/* Upper triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L60: */ - } -/* L70: */ - } - - } else if (itype == 3) { - -/* Upper Hessenberg matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j + 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L80: */ - } -/* L90: */ - } - - } else if (itype == 4) { - -/* Lower half of a symmetric band matrix */ - - k3 = *kl + 1; - k4 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = k3, i__4 = k4 - j; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L100: */ - } -/* L110: */ - } - - } else if (itype == 5) { - -/* Upper half of a symmetric band matrix */ - - k1 = *ku + 2; - k3 = *ku + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = k1 - j; - i__3 = k3; - for (i__ = max(i__2,1); i__ <= i__3; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L120: */ - } -/* L130: */ - } - - } else if (itype == 6) { - -/* Band matrix */ - - k1 = *kl + *ku + 2; - k2 = *kl + 1; - k3 = (*kl << 1) + *ku + 1; - k4 = *kl + *ku + 1 + *m; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__3 = k1 - j; -/* Computing MIN */ - i__4 = k3, i__5 = k4 - j; - i__2 = min(i__4,i__5); - for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L140: */ - } -/* L150: */ - } - - } - - if (! done) { - goto L10; - } - - return 0; - -/* End of DLASCL */ - -} /* dlascl_ */ - -/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__, - doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * - ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * - info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, - iwk, lvl, ndb1, nlp1, nrp1; - static doublereal beta; - static integer idxq, nlvl; - static doublereal alpha; - static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; - extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - doublereal *, integer *, integer *, integer *, doublereal *, - integer *), dlasdq_(char *, integer *, integer *, integer *, - integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *), dlasdt_(integer *, integer *, - integer *, integer *, integer *, integer *, integer *), xerbla_( - char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - Using a divide and conquer approach, DLASD0 computes the singular - value decomposition (SVD) of a real upper bidiagonal N-by-M - matrix B with diagonal D and offdiagonal E, where M = N + SQRE. - The algorithm computes orthogonal matrices U and VT such that - B = U * S * VT. The singular values S are overwritten on D. - - A related subroutine, DLASDA, computes only the singular values, - and optionally, the singular vectors in compact form. - - Arguments - ========= - - N (input) INTEGER - On entry, the row dimension of the upper bidiagonal matrix. - This is also the dimension of the main diagonal array D. - - SQRE (input) INTEGER - Specifies the column dimension of the bidiagonal matrix. - = 0: The bidiagonal matrix has column dimension M = N; - = 1: The bidiagonal matrix has column dimension M = N+1; - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry D contains the main diagonal of the bidiagonal - matrix. - On exit D, if INFO = 0, contains its singular values. - - E (input) DOUBLE PRECISION array, dimension (M-1) - Contains the subdiagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) - On exit, U contains the left singular vectors. - - LDU (input) INTEGER - On entry, leading dimension of U. - - VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) - On exit, VT' contains the right singular vectors. - - LDVT (input) INTEGER - On entry, leading dimension of VT. - - SMLSIZ (input) INTEGER - On entry, maximum size of the subproblems at the - bottom of the computation tree. - - IWORK INTEGER work array. - Dimension must be at least (8 * N) - - WORK DOUBLE PRECISION work array. - Dimension must be at least (3 * M**2 + 2 * M) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --iwork; - --work; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } else if (*sqre < 0 || *sqre > 1) { - *info = -2; - } - - m = *n + *sqre; - - if (*ldu < *n) { - *info = -6; - } else if (*ldvt < m) { - *info = -8; - } else if (*smlsiz < 3) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD0", &i__1); - return 0; - } - -/* If the input matrix is too small, call DLASDQ to find the SVD. */ - - if (*n <= *smlsiz) { - dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], - ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); - return 0; - } - -/* Set up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - idxq = ndimr + *n; - iwk = idxq + *n; - dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - For the nodes on bottom level of the tree, solve - their subproblems by DLASDQ. -*/ - - ndb1 = (nd + 1) / 2; - ncc = 0; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nlp1 = nl + 1; - nr = iwork[ndimr + i1]; - nrp1 = nr + 1; - nlf = ic - nl; - nrf = ic + 1; - sqrei = 1; - dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ - nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ - nlf + nlf * u_dim1], ldu, &work[1], info); - if (*info != 0) { - return 0; - } - itemp = idxq + nlf - 2; - i__2 = nl; - for (j = 1; j <= i__2; ++j) { - iwork[itemp + j] = j; -/* L10: */ - } - if (i__ == nd) { - sqrei = *sqre; - } else { - sqrei = 1; - } - nrp1 = nr + sqrei; - dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ - nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ - nrf + nrf * u_dim1], ldu, &work[1], info); - if (*info != 0) { - return 0; - } - itemp = idxq + ic; - i__2 = nr; - for (j = 1; j <= i__2; ++j) { - iwork[itemp + j - 1] = j; -/* L20: */ - } -/* L30: */ - } - -/* Now conquer each subproblem bottom-up. */ - - for (lvl = nlvl; lvl >= 1; --lvl) { - -/* - Find the first node LF and last node LL on the - current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - if (*sqre == 0 && i__ == ll) { - sqrei = *sqre; - } else { - sqrei = 1; - } - idxqc = idxq + nlf - 1; - alpha = d__[ic]; - beta = e[ic]; - dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * - u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ - idxqc], &iwork[iwk], &work[1], info); - if (*info != 0) { - return 0; - } -/* L40: */ - } -/* L50: */ - } - - return 0; - -/* End of DLASD0 */ - -} /* dlasd0_ */ - -/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre, - doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, - integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * - iwork, doublereal *work, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; - doublereal d__1, d__2; - - /* Local variables */ - static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, - idxc, idxp, ldvt2; - extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, integer *, - integer *, integer *, integer *, integer *, integer *), dlasd3_( - integer *, integer *, integer *, integer *, doublereal *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, doublereal *, integer *), - dlascl_(char *, integer *, integer *, doublereal *, doublereal *, - integer *, integer *, doublereal *, integer *, integer *), - dlamrg_(integer *, integer *, doublereal *, integer *, integer *, - integer *); - static integer isigma; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal orgnrm; - static integer coltyp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, - where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. - - A related subroutine DLASD7 handles the case in which the singular - values (and the singular vectors in factored form) are desired. - - DLASD1 computes the SVD as follows: - - ( D1(in) 0 0 0 ) - B = U(in) * ( Z1' a Z2' b ) * VT(in) - ( 0 0 D2(in) 0 ) - - = U(out) * ( D(out) 0) * VT(out) - - where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M - with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros - elsewhere; and the entry b is empty if SQRE = 0. - - The left singular vectors of the original matrix are stored in U, and - the transpose of the right singular vectors are stored in VT, and the - singular values are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple singular values or when there are zeros in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine DLASD2. - - The second stage consists of calculating the updated - singular values. This is done by finding the square roots of the - roots of the secular equation via the routine DLASD4 (as called - by DLASD3). This routine also calculates the singular vectors of - the current problem. - - The final stage consists of computing the updated singular vectors - directly using the updated singular values. The singular vectors - for the current problem are multiplied with the singular vectors - from the overall problem. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - D (input/output) DOUBLE PRECISION array, - dimension (N = NL+NR+1). - On entry D(1:NL,1:NL) contains the singular values of the - upper block; and D(NL+2:N) contains the singular values of - the lower block. On exit D(1:N) contains the singular values - of the modified matrix. - - ALPHA (input) DOUBLE PRECISION - Contains the diagonal element associated with the added row. - - BETA (input) DOUBLE PRECISION - Contains the off-diagonal element associated with the added - row. - - U (input/output) DOUBLE PRECISION array, dimension(LDU,N) - On entry U(1:NL, 1:NL) contains the left singular vectors of - the upper block; U(NL+2:N, NL+2:N) contains the left singular - vectors of the lower block. On exit U contains the left - singular vectors of the bidiagonal matrix. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= max( 1, N ). - - VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) - where M = N + SQRE. - On entry VT(1:NL+1, 1:NL+1)' contains the right singular - vectors of the upper block; VT(NL+2:M, NL+2:M)' contains - the right singular vectors of the lower block. On exit - VT' contains the right singular vectors of the - bidiagonal matrix. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= max( 1, M ). - - IDXQ (output) INTEGER array, dimension(N) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, i.e. - D( IDXQ( I = 1, N ) ) will be in ascending order. - - IWORK (workspace) INTEGER array, dimension( 4 * N ) - - WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --idxq; - --iwork; - --work; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre < 0 || *sqre > 1) { - *info = -3; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD1", &i__1); - return 0; - } - - n = *nl + *nr + 1; - m = n + *sqre; - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in DLASD2 and DLASD3. -*/ - - ldu2 = n; - ldvt2 = m; - - iz = 1; - isigma = iz + m; - iu2 = isigma + n; - ivt2 = iu2 + ldu2 * n; - iq = ivt2 + ldvt2 * m; - - idx = 1; - idxc = idx + n; - coltyp = idxc + n; - idxp = coltyp + n; - -/* - Scale. - - Computing MAX -*/ - d__1 = abs(*alpha), d__2 = abs(*beta); - orgnrm = max(d__1,d__2); - d__[*nl + 1] = 0.; - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { - orgnrm = (d__1 = d__[i__], abs(d__1)); - } -/* L10: */ - } - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &n, &c__1, &d__[1], &n, - info); - *alpha /= orgnrm; - *beta /= orgnrm; - -/* Deflate singular values. */ - - dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], - ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & - work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & - idxq[1], &iwork[coltyp], info); - -/* Solve Secular Equation and update singular vectors. */ - - ldq = k; - dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ - u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ - ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); - if (*info != 0) { - return 0; - } - -/* Unscale. */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &n, &c__1, &d__[1], &n, - info); - -/* Prepare the IDXQ sorting permutation. */ - - n1 = k; - n2 = n - k; - dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); - - return 0; - -/* End of DLASD1 */ - -} /* dlasd1_ */ - -/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer - *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * - beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, - doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, - integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * - idxq, integer *coltyp, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, - vt2_dim1, vt2_offset, i__1; - doublereal d__1, d__2; - - /* Local variables */ - static doublereal c__; - static integer i__, j, m, n; - static doublereal s; - static integer k2; - static doublereal z1; - static integer ct, jp; - static doublereal eps, tau, tol; - static integer psm[4], nlp1, nlp2, idxi, idxj; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer ctot[4], idxjp; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer jprev; - - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), dlacpy_(char *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, - doublereal *, doublereal *, integer *), xerbla_(char *, - integer *); - static doublereal hlftol; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASD2 merges the two sets of singular values together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - singular values are close together or if there is a tiny entry in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - DLASD2 is called from DLASD1. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - D (input/output) DOUBLE PRECISION array, dimension(N) - On entry D contains the singular values of the two submatrices - to be combined. On exit D contains the trailing (N-K) updated - singular values (those which were deflated) sorted into - increasing order. - - ALPHA (input) DOUBLE PRECISION - Contains the diagonal element associated with the added row. - - BETA (input) DOUBLE PRECISION - Contains the off-diagonal element associated with the added - row. - - U (input/output) DOUBLE PRECISION array, dimension(LDU,N) - On entry U contains the left singular vectors of two - submatrices in the two square blocks with corners at (1,1), - (NL, NL), and (NL+2, NL+2), (N,N). - On exit U contains the trailing (N-K) updated left singular - vectors (those which were deflated) in its last N-K columns. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= N. - - Z (output) DOUBLE PRECISION array, dimension(N) - On exit Z contains the updating row vector in the secular - equation. - - DSIGMA (output) DOUBLE PRECISION array, dimension (N) - Contains a copy of the diagonal elements (K-1 singular values - and one zero) in the secular equation. - - U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) - Contains a copy of the first K-1 left singular vectors which - will be used by DLASD3 in a matrix multiply (DGEMM) to solve - for the new left singular vectors. U2 is arranged into four - blocks. The first block contains a column with 1 at NL+1 and - zero everywhere else; the second block contains non-zero - entries only at and above NL; the third contains non-zero - entries only below NL+1; and the fourth is dense. - - LDU2 (input) INTEGER - The leading dimension of the array U2. LDU2 >= N. - - VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) - On entry VT' contains the right singular vectors of two - submatrices in the two square blocks with corners at (1,1), - (NL+1, NL+1), and (NL+2, NL+2), (M,M). - On exit VT' contains the trailing (N-K) updated right singular - vectors (those which were deflated) in its last N-K columns. - In case SQRE =1, the last row of VT spans the right null - space. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= M. - - VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) - VT2' contains a copy of the first K right singular vectors - which will be used by DLASD3 in a matrix multiply (DGEMM) to - solve for the new right singular vectors. VT2 is arranged into - three blocks. The first block contains a row that corresponds - to the special 0 diagonal element in SIGMA; the second block - contains non-zeros only at and before NL +1; the third block - contains non-zeros only at and after NL +2. - - LDVT2 (input) INTEGER - The leading dimension of the array VT2. LDVT2 >= M. - - IDXP (workspace) INTEGER array, dimension(N) - This will contain the permutation used to place deflated - values of D at the end of the array. On output IDXP(2:K) - points to the nondeflated D-values and IDXP(K+1:N) - points to the deflated singular values. - - IDX (workspace) INTEGER array, dimension(N) - This will contain the permutation used to sort the contents of - D into ascending order. - - IDXC (output) INTEGER array, dimension(N) - This will contain the permutation used to arrange the columns - of the deflated U matrix into three groups: the first group - contains non-zero entries only at and above NL, the second - contains non-zero entries only below NL+2, and the third is - dense. - - COLTYP (workspace/output) INTEGER array, dimension(N) - As workspace, this will contain a label which will indicate - which of the following types a column in the U2 matrix or a - row in the VT2 matrix is: - 1 : non-zero in the upper half only - 2 : non-zero in the lower half only - 3 : dense - 4 : deflated - - On exit, it is an array of dimension 4, with COLTYP(I) being - the dimension of the I-th type columns. - - IDXQ (input) INTEGER array, dimension(N) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that entries in - the first hlaf of this permutation must first be moved one - position backward; and entries in the second half - must first have NL+1 added to their values. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --dsigma; - u2_dim1 = *ldu2; - u2_offset = 1 + u2_dim1; - u2 -= u2_offset; - vt2_dim1 = *ldvt2; - vt2_offset = 1 + vt2_dim1; - vt2 -= vt2_offset; - --idxp; - --idx; - --idxc; - --idxq; - --coltyp; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre != 1 && *sqre != 0) { - *info = -3; - } - - n = *nl + *nr + 1; - m = n + *sqre; - - if (*ldu < n) { - *info = -10; - } else if (*ldvt < m) { - *info = -12; - } else if (*ldu2 < n) { - *info = -15; - } else if (*ldvt2 < m) { - *info = -17; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD2", &i__1); - return 0; - } - - nlp1 = *nl + 1; - nlp2 = *nl + 2; - -/* - Generate the first part of the vector Z; and move the singular - values in the first part of D one position backward. -*/ - - z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; - z__[1] = z1; - for (i__ = *nl; i__ >= 1; --i__) { - z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; - d__[i__ + 1] = d__[i__]; - idxq[i__ + 1] = idxq[i__] + 1; -/* L10: */ - } - -/* Generate the second part of the vector Z. */ - - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; -/* L20: */ - } - -/* Initialize some reference arrays. */ - - i__1 = nlp1; - for (i__ = 2; i__ <= i__1; ++i__) { - coltyp[i__] = 1; -/* L30: */ - } - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - coltyp[i__] = 2; -/* L40: */ - } - -/* Sort the singular values into increasing order */ - - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - idxq[i__] += nlp1; -/* L50: */ - } - -/* - DSIGMA, IDXC, IDXC, and the first column of U2 - are used as storage space. -*/ - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dsigma[i__] = d__[idxq[i__]]; - u2[i__ + u2_dim1] = z__[idxq[i__]]; - idxc[i__] = coltyp[idxq[i__]]; -/* L60: */ - } - - dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - idxi = idx[i__] + 1; - d__[i__] = dsigma[idxi]; - z__[i__] = u2[idxi + u2_dim1]; - coltyp[i__] = idxc[idxi]; -/* L70: */ - } - -/* Calculate the allowable deflation tolerance */ - - eps = EPSILON; -/* Computing MAX */ - d__1 = abs(*alpha), d__2 = abs(*beta); - tol = max(d__1,d__2); -/* Computing MAX */ - d__2 = (d__1 = d__[n], abs(d__1)); - tol = eps * 8. * max(d__2,tol); - -/* - There are 2 kinds of deflation -- first a value in the z-vector - is small, second two (or more) singular values are very close - together (their difference is small). - - If the value in the z-vector is small, we simply permute the - array so that the corresponding singular value is moved to the - end. - - If two values in the D-vector are close, we perform a two-sided - rotation designed to make one of the corresponding z-vector - entries zero, and then permute the array so that the deflated - singular value is moved to the end. - - If there are multiple singular values then the problem deflates. - Here the number of equal singular values are found. As each equal - singular value is found, an elementary reflector is computed to - rotate the corresponding singular subspace so that the - corresponding components of Z are zero in this new basis. -*/ - - *k = 1; - k2 = n + 1; - i__1 = n; - for (j = 2; j <= i__1; ++j) { - if ((d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - coltyp[j] = 4; - if (j == n) { - goto L120; - } - } else { - jprev = j; - goto L90; - } -/* L80: */ - } -L90: - j = jprev; -L100: - ++j; - if (j > n) { - goto L110; - } - if ((d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - coltyp[j] = 4; - } else { - -/* Check if singular values are close enough to allow deflation. */ - - if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { - -/* Deflation is possible. */ - - s = z__[jprev]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = dlapy2_(&c__, &s); - c__ /= tau; - s = -s / tau; - z__[j] = tau; - z__[jprev] = 0.; - -/* - Apply back the Givens rotation to the left and right - singular vector matrices. -*/ - - idxjp = idxq[idx[jprev] + 1]; - idxj = idxq[idx[j] + 1]; - if (idxjp <= nlp1) { - --idxjp; - } - if (idxj <= nlp1) { - --idxj; - } - drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & - c__1, &c__, &s); - drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & - c__, &s); - if (coltyp[j] != coltyp[jprev]) { - coltyp[j] = 3; - } - coltyp[jprev] = 4; - --k2; - idxp[k2] = jprev; - jprev = j; - } else { - ++(*k); - u2[*k + u2_dim1] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - jprev = j; - } - } - goto L100; -L110: - -/* Record the last singular value. */ - - ++(*k); - u2[*k + u2_dim1] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - -L120: - -/* - Count up the total number of the various types of columns, then - form a permutation which positions the four column types into - four groups of uniform structure (although one or more of these - groups may be empty). -*/ - - for (j = 1; j <= 4; ++j) { - ctot[j - 1] = 0; -/* L130: */ - } - i__1 = n; - for (j = 2; j <= i__1; ++j) { - ct = coltyp[j]; - ++ctot[ct - 1]; -/* L140: */ - } - -/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ - - psm[0] = 2; - psm[1] = ctot[0] + 2; - psm[2] = psm[1] + ctot[1]; - psm[3] = psm[2] + ctot[2]; - -/* - Fill out the IDXC array so that the permutation which it induces - will place all type-1 columns first, all type-2 columns next, - then all type-3's, and finally all type-4's, starting from the - second column. This applies similarly to the rows of VT. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - ct = coltyp[jp]; - idxc[psm[ct - 1]] = j; - ++psm[ct - 1]; -/* L150: */ - } - -/* - Sort the singular values and corresponding singular vectors into - DSIGMA, U2, and VT2 respectively. The singular values/vectors - which were not deflated go into the first K slots of DSIGMA, U2, - and VT2 respectively, while those which were deflated go into the - last N - K slots, except that the first column/row will be treated - separately. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - dsigma[j] = d__[jp]; - idxj = idxq[idx[idxp[idxc[j]]] + 1]; - if (idxj <= nlp1) { - --idxj; - } - dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); - dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); -/* L160: */ - } - -/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ - - dsigma[1] = 0.; - hlftol = tol / 2.; - if (abs(dsigma[2]) <= hlftol) { - dsigma[2] = hlftol; - } - if (m > n) { - z__[1] = dlapy2_(&z1, &z__[m]); - if (z__[1] <= tol) { - c__ = 1.; - s = 0.; - z__[1] = tol; - } else { - c__ = z1 / z__[1]; - s = z__[m] / z__[1]; - } - } else { - if (abs(z1) <= tol) { - z__[1] = tol; - } else { - z__[1] = z1; - } - } - -/* Move the rest of the updating row to Z. */ - - i__1 = *k - 1; - dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); - -/* - Determine the first column of U2, the first row of VT2 and the - last row of VT. -*/ - - dlaset_("A", &n, &c__1, &c_b2467, &c_b2467, &u2[u2_offset], ldu2); - u2[nlp1 + u2_dim1] = 1.; - if (m > n) { - i__1 = nlp1; - for (i__ = 1; i__ <= i__1; ++i__) { - vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; - vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; -/* L170: */ - } - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; - vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; -/* L180: */ - } - } else { - dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); - } - if (m > n) { - dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); - } - -/* - The deflated singular values and their corresponding vectors go - into the back of D, U, and V respectively. -*/ - - if (n > *k) { - i__1 = n - *k; - dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = n - *k; - dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) - * u_dim1 + 1], ldu); - i__1 = n - *k; - dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + - vt_dim1], ldvt); - } - -/* Copy CTOT into COLTYP for referencing in DLASD3. */ - - for (j = 1; j <= 4; ++j) { - coltyp[j] = ctot[j - 1]; -/* L190: */ - } - - return 0; - -/* End of DLASD2 */ - -} /* dlasd2_ */ - -/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer - *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, - doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, - doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, - integer *idxc, integer *ctot, doublereal *z__, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, - vt_offset, vt2_dim1, vt2_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer i__, j, m, n, jc; - static doublereal rho; - static integer nlp1, nlp2, nrp1; - static doublereal temp; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer ctemp; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer ktemp; - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *), dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlacpy_(char *, integer *, integer - *, doublereal *, integer *, doublereal *, integer *), - xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASD3 finds all the square roots of the roots of the secular - equation, as defined by the values in D and Z. It makes the - appropriate calls to DLASD4 and then updates the singular - vectors by matrix multiplication. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - DLASD3 is called from DLASD1. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (input) INTEGER - The size of the secular equation, 1 =< K = < N. - - D (output) DOUBLE PRECISION array, dimension(K) - On exit the square roots of the roots of the secular equation, - in ascending order. - - Q (workspace) DOUBLE PRECISION array, - dimension at least (LDQ,K). - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= K. - - DSIGMA (input) DOUBLE PRECISION array, dimension(K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - U (input) DOUBLE PRECISION array, dimension (LDU, N) - The last N - K columns of this matrix contain the deflated - left singular vectors. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= N. - - U2 (input) DOUBLE PRECISION array, dimension (LDU2, N) - The first K columns of this matrix contain the non-deflated - left singular vectors for the split problem. - - LDU2 (input) INTEGER - The leading dimension of the array U2. LDU2 >= N. - - VT (input) DOUBLE PRECISION array, dimension (LDVT, M) - The last M - K columns of VT' contain the deflated - right singular vectors. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= N. - - VT2 (input) DOUBLE PRECISION array, dimension (LDVT2, N) - The first K columns of VT2' contain the non-deflated - right singular vectors for the split problem. - - LDVT2 (input) INTEGER - The leading dimension of the array VT2. LDVT2 >= N. - - IDXC (input) INTEGER array, dimension ( N ) - The permutation used to arrange the columns of U (and rows of - VT) into three groups: the first group contains non-zero - entries only at and above (or before) NL +1; the second - contains non-zero entries only at and below (or after) NL+2; - and the third is dense. The first column of U and the row of - VT are treated separately, however. - - The rows of the singular vectors found by DLASD4 - must be likewise permuted before the matrix multiplies can - take place. - - CTOT (input) INTEGER array, dimension ( 4 ) - A count of the total number of the various types of columns - in U (or rows in VT), as described in IDXC. The fourth column - type is any column which has been deflated. - - Z (input) DOUBLE PRECISION array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating row vector. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dsigma; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - u2_dim1 = *ldu2; - u2_offset = 1 + u2_dim1; - u2 -= u2_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - vt2_dim1 = *ldvt2; - vt2_offset = 1 + vt2_dim1; - vt2 -= vt2_offset; - --idxc; - --ctot; - --z__; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre != 1 && *sqre != 0) { - *info = -3; - } - - n = *nl + *nr + 1; - m = n + *sqre; - nlp1 = *nl + 1; - nlp2 = *nl + 2; - - if (*k < 1 || *k > n) { - *info = -4; - } else if (*ldq < *k) { - *info = -7; - } else if (*ldu < n) { - *info = -10; - } else if (*ldu2 < n) { - *info = -12; - } else if (*ldvt < m) { - *info = -14; - } else if (*ldvt2 < m) { - *info = -16; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD3", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 1) { - d__[1] = abs(z__[1]); - dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); - if (z__[1] > 0.) { - dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); - } else { - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - u[i__ + u_dim1] = -u2[i__ + u2_dim1]; -/* L10: */ - } - } - return 0; - } - -/* - Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), - which on any of these machines zeros out the bottommost - bit of DSIGMA(I) if it is 1; this makes the subsequent - subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DSIGMA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DSIGMA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; -/* L20: */ - } - -/* Keep a copy of Z. */ - - dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); - -/* Normalize Z. */ - - rho = dnrm2_(k, &z__[1], &c__1); - dlascl_("G", &c__0, &c__0, &rho, &c_b2453, k, &c__1, &z__[1], k, info); - rho *= rho; - -/* Find the new singular values. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], - &vt[j * vt_dim1 + 1], info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - return 0; - } -/* L30: */ - } - -/* Compute updated Z. */ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ - i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); -/* L40: */ - } - i__2 = *k - 1; - for (j = i__; j <= i__2; ++j) { - z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ - i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); -/* L50: */ - } - d__2 = sqrt((d__1 = z__[i__], abs(d__1))); - z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); -/* L60: */ - } - -/* - Compute left singular vectors of the modified diagonal matrix, - and store related information for the right singular vectors. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * - vt_dim1 + 1]; - u[i__ * u_dim1 + 1] = -1.; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ - * vt_dim1]; - u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; -/* L70: */ - } - temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); - q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - jc = idxc[j]; - q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; -/* L80: */ - } -/* L90: */ - } - -/* Update the left singular vector matrix. */ - - if (*k == 2) { - dgemm_("N", "N", &n, k, k, &c_b2453, &u2[u2_offset], ldu2, &q[ - q_offset], ldq, &c_b2467, &u[u_offset], ldu); - goto L100; - } - if (ctot[1] > 0) { - dgemm_("N", "N", nl, k, &ctot[1], &c_b2453, &u2[(u2_dim1 << 1) + 1], - ldu2, &q[q_dim1 + 2], ldq, &c_b2467, &u[u_dim1 + 1], ldu); - if (ctot[3] > 0) { - ktemp = ctot[1] + 2 + ctot[2]; - dgemm_("N", "N", nl, k, &ctot[3], &c_b2453, &u2[ktemp * u2_dim1 + - 1], ldu2, &q[ktemp + q_dim1], ldq, &c_b2453, &u[u_dim1 + - 1], ldu); - } - } else if (ctot[3] > 0) { - ktemp = ctot[1] + 2 + ctot[2]; - dgemm_("N", "N", nl, k, &ctot[3], &c_b2453, &u2[ktemp * u2_dim1 + 1], - ldu2, &q[ktemp + q_dim1], ldq, &c_b2467, &u[u_dim1 + 1], ldu); - } else { - dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); - } - dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); - ktemp = ctot[1] + 2; - ctemp = ctot[2] + ctot[3]; - dgemm_("N", "N", nr, k, &ctemp, &c_b2453, &u2[nlp2 + ktemp * u2_dim1], - ldu2, &q[ktemp + q_dim1], ldq, &c_b2467, &u[nlp2 + u_dim1], ldu); - -/* Generate the right singular vectors. */ - -L100: - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); - q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - jc = idxc[j]; - q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; -/* L110: */ - } -/* L120: */ - } - -/* Update the right singular vector matrix. */ - - if (*k == 2) { - dgemm_("N", "N", k, &m, k, &c_b2453, &q[q_offset], ldq, &vt2[ - vt2_offset], ldvt2, &c_b2467, &vt[vt_offset], ldvt); - return 0; - } - ktemp = ctot[1] + 1; - dgemm_("N", "N", k, &nlp1, &ktemp, &c_b2453, &q[q_dim1 + 1], ldq, &vt2[ - vt2_dim1 + 1], ldvt2, &c_b2467, &vt[vt_dim1 + 1], ldvt); - ktemp = ctot[1] + 2 + ctot[2]; - if (ktemp <= *ldvt2) { - dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b2453, &q[ktemp * q_dim1 + 1], - ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b2453, &vt[vt_dim1 + - 1], ldvt); - } - - ktemp = ctot[1] + 1; - nrp1 = *nr + *sqre; - if (ktemp > 1) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; -/* L130: */ - } - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; -/* L140: */ - } - } - ctemp = ctot[2] + 1 + ctot[3]; - dgemm_("N", "N", k, &nrp1, &ctemp, &c_b2453, &q[ktemp * q_dim1 + 1], ldq, - &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b2467, &vt[nlp2 * - vt_dim1 + 1], ldvt); - - return 0; - -/* End of DLASD3 */ - -} /* dlasd3_ */ - -/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__, - doublereal *z__, doublereal *delta, doublereal *rho, doublereal * - sigma, doublereal *work, integer *info) -{ - /* System generated locals */ - integer i__1; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal a, b, c__; - static integer j; - static doublereal w, dd[3]; - static integer ii; - static doublereal dw, zz[3]; - static integer ip1; - static doublereal eta, phi, eps, tau, psi; - static integer iim1, iip1; - static doublereal dphi, dpsi; - static integer iter; - static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, - dtiip; - static integer niter; - static doublereal dtisq; - static logical swtch; - static doublereal dtnsq; - extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *) - , dlasd5_(integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *); - static doublereal delsq2, dtnsq1; - static logical swtch3; - - static logical orgati; - static doublereal erretm, dtipsq, rhoinv; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - This subroutine computes the square root of the I-th updated - eigenvalue of a positive symmetric rank-one modification to - a positive diagonal matrix whose entries are given as the squares - of the corresponding entries in the array d, and that - - 0 <= D(i) < D(j) for i < j - - and that RHO > 0. This is arranged by the calling routine, and is - no loss in generality. The rank-one modified system is thus - - diag( D ) * diag( D ) + RHO * Z * Z_transpose. - - where we assume the Euclidean norm of Z is 1. - - The method consists of approximating the rational functions in the - secular equation by simpler interpolating rational functions. - - Arguments - ========= - - N (input) INTEGER - The length of all arrays. - - I (input) INTEGER - The index of the eigenvalue to be computed. 1 <= I <= N. - - D (input) DOUBLE PRECISION array, dimension ( N ) - The original eigenvalues. It is assumed that they are in - order, 0 <= D(I) < D(J) for I < J. - - Z (input) DOUBLE PRECISION array, dimension ( N ) - The components of the updating vector. - - DELTA (output) DOUBLE PRECISION array, dimension ( N ) - If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th - component. If N = 1, then DELTA(1) = 1. The vector DELTA - contains the information necessary to construct the - (singular) eigenvectors. - - RHO (input) DOUBLE PRECISION - The scalar in the symmetric updating formula. - - SIGMA (output) DOUBLE PRECISION - The computed lambda_I, the I-th updated eigenvalue. - - WORK (workspace) DOUBLE PRECISION array, dimension ( N ) - If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th - component. If N = 1, then WORK( 1 ) = 1. - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, the updating process failed. - - Internal Parameters - =================== - - Logical variable ORGATI (origin-at-i?) is used for distinguishing - whether D(i) or D(i+1) is treated as the origin. - - ORGATI = .true. origin at i - ORGATI = .false. origin at i+1 - - Logical variable SWTCH3 (switch-for-3-poles?) is for noting - if we are working with THREE poles! - - MAXIT is the maximum number of iterations allowed for each - eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Since this routine is called in an inner loop, we do no argument - checking. - - Quick return for N=1 and 2. -*/ - - /* Parameter adjustments */ - --work; - --delta; - --z__; - --d__; - - /* Function Body */ - *info = 0; - if (*n == 1) { - -/* Presumably, I=1 upon entry */ - - *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); - delta[1] = 1.; - work[1] = 1.; - return 0; - } - if (*n == 2) { - dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); - return 0; - } - -/* Compute machine epsilon */ - - eps = EPSILON; - rhoinv = 1. / *rho; - -/* The case I = N */ - - if (*i__ == *n) { - -/* Initialize some basic variables */ - - ii = *n - 1; - niter = 1; - -/* Calculate initial guess */ - - temp = *rho / 2.; - -/* - If ||Z||_2 is not one, then TEMP should be set to - RHO * ||Z||_2^2 / TWO -*/ - - temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*n] + temp1; - delta[j] = d__[j] - d__[*n] - temp1; -/* L10: */ - } - - psi = 0.; - i__1 = *n - 2; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / (delta[j] * work[j]); -/* L20: */ - } - - c__ = rhoinv + psi; - w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* - n] / (delta[*n] * work[*n]); - - if (w <= 0.) { - temp1 = sqrt(d__[*n] * d__[*n] + *rho); - temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* - n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * - z__[*n] / *rho; - -/* - The following TAU is to approximate - SIGMA_n^2 - D( N )*D( N ) -*/ - - if (c__ <= temp) { - tau = *rho; - } else { - delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); - a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* - n]; - b = z__[*n] * z__[*n] * delsq; - if (a < 0.) { - tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); - } - } - -/* - It can be proved that - D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO -*/ - - } else { - delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); - a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; - b = z__[*n] * z__[*n] * delsq; - -/* - The following TAU is to approximate - SIGMA_n^2 - D( N )*D( N ) -*/ - - if (a < 0.) { - tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); - } - -/* - It can be proved that - D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 -*/ - - } - -/* The following ETA is to approximate SIGMA_n - D( N ) */ - - eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); - - *sigma = d__[*n] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - eta; - work[j] = d__[j] + d__[*i__] + eta; -/* L30: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (delta[j] * work[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L40: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (delta[*n] * work[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi - + dphi); - - w = rhoinv + phi + psi; - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - ++niter; - dtnsq1 = work[*n - 1] * delta[*n - 1]; - dtnsq = work[*n] * delta[*n]; - c__ = w - dtnsq1 * dpsi - dtnsq * dphi; - a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); - b = dtnsq * dtnsq1 * w; - if (c__ < 0.) { - c__ = abs(c__); - } - if (c__ == 0.) { - eta = *rho - *sigma * *sigma; - } else if (a >= 0.) { - eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ - * 2.); - } else { - eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) - ); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.) { - eta = -w / (dpsi + dphi); - } - temp = eta - dtnsq; - if (temp > *rho) { - eta = *rho + dtnsq; - } - - tau += eta; - eta /= *sigma + sqrt(eta + *sigma * *sigma); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; - work[j] += eta; -/* L50: */ - } - - *sigma += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L60: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (work[*n] * delta[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi - + dphi); - - w = rhoinv + phi + psi; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - dtnsq1 = work[*n - 1] * delta[*n - 1]; - dtnsq = work[*n] * delta[*n]; - c__ = w - dtnsq1 * dpsi - dtnsq * dphi; - a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); - b = dtnsq1 * dtnsq * w; - if (a >= 0.) { - eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } else { - eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.) { - eta = -w / (dpsi + dphi); - } - temp = eta - dtnsq; - if (temp <= 0.) { - eta /= 2.; - } - - tau += eta; - eta /= *sigma + sqrt(eta + *sigma * *sigma); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; - work[j] += eta; -/* L70: */ - } - - *sigma += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L80: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (work[*n] * delta[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; -/* L90: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - goto L240; - -/* End for the case I = N */ - - } else { - -/* The case for I < N */ - - niter = 1; - ip1 = *i__ + 1; - -/* Calculate initial guess */ - - delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); - delsq2 = delsq / 2.; - temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*i__] + temp; - delta[j] = d__[j] - d__[*i__] - temp; -/* L100: */ - } - - psi = 0.; - i__1 = *i__ - 1; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / (work[j] * delta[j]); -/* L110: */ - } - - phi = 0.; - i__1 = *i__ + 2; - for (j = *n; j >= i__1; --j) { - phi += z__[j] * z__[j] / (work[j] * delta[j]); -/* L120: */ - } - c__ = rhoinv + psi + phi; - w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ - ip1] * z__[ip1] / (work[ip1] * delta[ip1]); - - if (w > 0.) { - -/* - d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 - - We choose d(i) as origin. -*/ - - orgati = TRUE_; - sg2lb = 0.; - sg2ub = delsq2; - a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; - b = z__[*i__] * z__[*i__] * delsq; - if (a > 0.) { - tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } else { - tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } - -/* - TAU now is an estimation of SIGMA^2 - D( I )^2. The - following, however, is the corresponding estimation of - SIGMA - D( I ). -*/ - - eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); - } else { - -/* - (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 - - We choose d(i+1) as origin. -*/ - - orgati = FALSE_; - sg2lb = -delsq2; - sg2ub = 0.; - a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; - b = z__[ip1] * z__[ip1] * delsq; - if (a < 0.) { - tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( - d__1)))); - } else { - tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / - (c__ * 2.); - } - -/* - TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The - following, however, is the corresponding estimation of - SIGMA - D( IP1 ). -*/ - - eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau, - abs(d__1)))); - } - - if (orgati) { - ii = *i__; - *sigma = d__[*i__] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*i__] + eta; - delta[j] = d__[j] - d__[*i__] - eta; -/* L130: */ - } - } else { - ii = *i__ + 1; - *sigma = d__[ip1] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[ip1] + eta; - delta[j] = d__[j] - d__[ip1] - eta; -/* L140: */ - } - } - iim1 = ii - 1; - iip1 = ii + 1; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L150: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L160: */ - } - - w = rhoinv + phi + psi; - -/* - W is the value of the secular function with - its ii-th element removed. -*/ - - swtch3 = FALSE_; - if (orgati) { - if (w < 0.) { - swtch3 = TRUE_; - } - } else { - if (w > 0.) { - swtch3 = TRUE_; - } - } - if (ii == 1 || ii == *n) { - swtch3 = FALSE_; - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w += temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + - abs(tau) * dw; - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - goto L240; - } - - if (w <= 0.) { - sg2lb = max(sg2lb,tau); - } else { - sg2ub = min(sg2ub,tau); - } - -/* Calculate the new step */ - - ++niter; - if (! swtch3) { - dtipsq = work[ip1] * delta[ip1]; - dtisq = work[*i__] * delta[*i__]; - if (orgati) { -/* Computing 2nd power */ - d__1 = z__[*i__] / dtisq; - c__ = w - dtipsq * dw + delsq * (d__1 * d__1); - } else { -/* Computing 2nd power */ - d__1 = z__[ip1] / dtipsq; - c__ = w - dtisq * dw - delsq * (d__1 * d__1); - } - a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; - b = dtipsq * dtisq * w; - if (c__ == 0.) { - if (a == 0.) { - if (orgati) { - a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + - dphi); - } else { - a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + - dphi); - } - } - eta = b / a; - } else if (a <= 0.) { - eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( - c__ * 2.); - } else { - eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( - d__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - dtiim = work[iim1] * delta[iim1]; - dtiip = work[iip1] * delta[iip1]; - temp = rhoinv + psi + phi; - if (orgati) { - temp1 = z__[iim1] / dtiim; - temp1 *= temp1; - c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * - (d__[iim1] + d__[iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - if (dpsi < temp1) { - zz[2] = dtiip * dtiip * dphi; - } else { - zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); - } - } else { - temp1 = z__[iip1] / dtiip; - temp1 *= temp1; - c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * - (d__[iim1] + d__[iip1]) * temp1; - if (dphi < temp1) { - zz[0] = dtiim * dtiim * dpsi; - } else { - zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); - } - zz[2] = z__[iip1] * z__[iip1]; - } - zz[1] = z__[ii] * z__[ii]; - dd[0] = dtiim; - dd[1] = delta[ii] * work[ii]; - dd[2] = dtiip; - dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); - if (*info != 0) { - goto L240; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.) { - eta = -w / dw; - } - if (orgati) { - temp1 = work[*i__] * delta[*i__]; - temp = eta - temp1; - } else { - temp1 = work[ip1] * delta[ip1]; - temp = eta - temp1; - } - if (temp > sg2ub || temp < sg2lb) { - if (w < 0.) { - eta = (sg2ub - tau) / 2.; - } else { - eta = (sg2lb - tau) / 2.; - } - } - - tau += eta; - eta /= *sigma + sqrt(*sigma * *sigma + eta); - - prew = w; - - *sigma += eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] += eta; - delta[j] -= eta; -/* L170: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L180: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L190: */ - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + - abs(tau) * dw; - - if (w <= 0.) { - sg2lb = max(sg2lb,tau); - } else { - sg2ub = min(sg2ub,tau); - } - - swtch = FALSE_; - if (orgati) { - if (-w > abs(prew) / 10.) { - swtch = TRUE_; - } - } else { - if (w > abs(prew) / 10.) { - swtch = TRUE_; - } - } - -/* Main loop to update the values of the array DELTA and WORK */ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - -/* Test for convergence */ - - if (abs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - if (! swtch3) { - dtipsq = work[ip1] * delta[ip1]; - dtisq = work[*i__] * delta[*i__]; - if (! swtch) { - if (orgati) { -/* Computing 2nd power */ - d__1 = z__[*i__] / dtisq; - c__ = w - dtipsq * dw + delsq * (d__1 * d__1); - } else { -/* Computing 2nd power */ - d__1 = z__[ip1] / dtipsq; - c__ = w - dtisq * dw - delsq * (d__1 * d__1); - } - } else { - temp = z__[ii] / (work[ii] * delta[ii]); - if (orgati) { - dpsi += temp * temp; - } else { - dphi += temp * temp; - } - c__ = w - dtisq * dpsi - dtipsq * dphi; - } - a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; - b = dtipsq * dtisq * w; - if (c__ == 0.) { - if (a == 0.) { - if (! swtch) { - if (orgati) { - a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * - (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( - dpsi + dphi); - } - } else { - a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; - } - } - eta = b / a; - } else if (a <= 0.) { - eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) - / (c__ * 2.); - } else { - eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, - abs(d__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - dtiim = work[iim1] * delta[iim1]; - dtiip = work[iip1] * delta[iip1]; - temp = rhoinv + psi + phi; - if (swtch) { - c__ = temp - dtiim * dpsi - dtiip * dphi; - zz[0] = dtiim * dtiim * dpsi; - zz[2] = dtiip * dtiip * dphi; - } else { - if (orgati) { - temp1 = z__[iim1] / dtiim; - temp1 *= temp1; - temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ - iip1]) * temp1; - c__ = temp - dtiip * (dpsi + dphi) - temp2; - zz[0] = z__[iim1] * z__[iim1]; - if (dpsi < temp1) { - zz[2] = dtiip * dtiip * dphi; - } else { - zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); - } - } else { - temp1 = z__[iip1] / dtiip; - temp1 *= temp1; - temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ - iip1]) * temp1; - c__ = temp - dtiim * (dpsi + dphi) - temp2; - if (dphi < temp1) { - zz[0] = dtiim * dtiim * dpsi; - } else { - zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); - } - zz[2] = z__[iip1] * z__[iip1]; - } - } - dd[0] = dtiim; - dd[1] = delta[ii] * work[ii]; - dd[2] = dtiip; - dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); - if (*info != 0) { - goto L240; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.) { - eta = -w / dw; - } - if (orgati) { - temp1 = work[*i__] * delta[*i__]; - temp = eta - temp1; - } else { - temp1 = work[ip1] * delta[ip1]; - temp = eta - temp1; - } - if (temp > sg2ub || temp < sg2lb) { - if (w < 0.) { - eta = (sg2ub - tau) / 2.; - } else { - eta = (sg2lb - tau) / 2.; - } - } - - tau += eta; - eta /= *sigma + sqrt(*sigma * *sigma + eta); - - *sigma += eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] += eta; - delta[j] -= eta; -/* L200: */ - } - - prew = w; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.; - psi = 0.; - erretm = 0.; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L210: */ - } - erretm = abs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.; - phi = 0.; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L220: */ - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. - + abs(tau) * dw; - if (w * prew > 0. && abs(w) > abs(prew) / 10.) { - swtch = ! swtch; - } - - if (w <= 0.) { - sg2lb = max(sg2lb,tau); - } else { - sg2ub = min(sg2ub,tau); - } - -/* L230: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - - } - -L240: - return 0; - -/* End of DLASD4 */ - -} /* dlasd4_ */ - -/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__, - doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * - work) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal b, c__, w, del, tau, delsq; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - This subroutine computes the square root of the I-th eigenvalue - of a positive symmetric rank-one modification of a 2-by-2 diagonal - matrix - - diag( D ) * diag( D ) + RHO * Z * transpose(Z) . - - The diagonal entries in the array D are assumed to satisfy - - 0 <= D(i) < D(j) for i < j . - - We also assume RHO > 0 and that the Euclidean norm of the vector - Z is one. - - Arguments - ========= - - I (input) INTEGER - The index of the eigenvalue to be computed. I = 1 or I = 2. - - D (input) DOUBLE PRECISION array, dimension ( 2 ) - The original eigenvalues. We assume 0 <= D(1) < D(2). - - Z (input) DOUBLE PRECISION array, dimension ( 2 ) - The components of the updating vector. - - DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) - Contains (D(j) - lambda_I) in its j-th component. - The vector DELTA contains the information necessary - to construct the eigenvectors. - - RHO (input) DOUBLE PRECISION - The scalar in the symmetric updating formula. - - DSIGMA (output) DOUBLE PRECISION - The computed lambda_I, the I-th updated eigenvalue. - - WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) - WORK contains (D(j) + sigma_I) in its j-th component. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --work; - --delta; - --z__; - --d__; - - /* Function Body */ - del = d__[2] - d__[1]; - delsq = del * (d__[2] + d__[1]); - if (*i__ == 1) { - w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * - z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; - if (w > 0.) { - b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[1] * z__[1] * delsq; - -/* - B > ZERO, always - - The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) -*/ - - tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); - -/* The following TAU is DSIGMA - D( 1 ) */ - - tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); - *dsigma = d__[1] + tau; - delta[1] = -tau; - delta[2] = del - tau; - work[1] = d__[1] * 2. + tau; - work[2] = d__[1] + tau + d__[2]; -/* - DELTA( 1 ) = -Z( 1 ) / TAU - DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) -*/ - } else { - b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * delsq; - -/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ - - if (b > 0.) { - tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); - } else { - tau = (b - sqrt(b * b + c__ * 4.)) / 2.; - } - -/* The following TAU is DSIGMA - D( 2 ) */ - - tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); - *dsigma = d__[2] + tau; - delta[1] = -(del + tau); - delta[2] = -tau; - work[1] = d__[1] + tau + d__[2]; - work[2] = d__[2] * 2. + tau; -/* - DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) - DELTA( 2 ) = -Z( 2 ) / TAU -*/ - } -/* - TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) - DELTA( 1 ) = DELTA( 1 ) / TEMP - DELTA( 2 ) = DELTA( 2 ) / TEMP -*/ - } else { - -/* Now I=2 */ - - b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * delsq; - -/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ - - if (b > 0.) { - tau = (b + sqrt(b * b + c__ * 4.)) / 2.; - } else { - tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); - } - -/* The following TAU is DSIGMA - D( 2 ) */ - - tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); - *dsigma = d__[2] + tau; - delta[1] = -(del + tau); - delta[2] = -tau; - work[1] = d__[1] + tau + d__[2]; - work[2] = d__[2] * 2. + tau; -/* - DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) - DELTA( 2 ) = -Z( 2 ) / TAU - TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) - DELTA( 1 ) = DELTA( 1 ) / TEMP - DELTA( 2 ) = DELTA( 2 ) / TEMP -*/ - } - return 0; - -/* End of DLASD5 */ - -} /* dlasd5_ */ - -/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr, - integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, - doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, - integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, - integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * - difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, - doublereal *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, - poles_dim1, poles_offset, i__1; - doublereal d__1, d__2; - - /* Local variables */ - static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *), dlasd7_(integer *, integer *, integer *, - integer *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, integer *, integer *, - integer *, integer *, integer *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), dlasd8_( - integer *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, integer *), dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlamrg_(integer *, integer *, - doublereal *, integer *, integer *, integer *); - static integer isigma; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal orgnrm; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASD6 computes the SVD of an updated upper bidiagonal matrix B - obtained by merging two smaller ones by appending a row. This - routine is used only for the problem which requires all singular - values and optionally singular vector matrices in factored form. - B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. - A related subroutine, DLASD1, handles the case in which all singular - values and singular vectors of the bidiagonal matrix are desired. - - DLASD6 computes the SVD as follows: - - ( D1(in) 0 0 0 ) - B = U(in) * ( Z1' a Z2' b ) * VT(in) - ( 0 0 D2(in) 0 ) - - = U(out) * ( D(out) 0) * VT(out) - - where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M - with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros - elsewhere; and the entry b is empty if SQRE = 0. - - The singular values of B can be computed using D1, D2, the first - components of all the right singular vectors of the lower block, and - the last components of all the right singular vectors of the upper - block. These components are stored and updated in VF and VL, - respectively, in DLASD6. Hence U and VT are not explicitly - referenced. - - The singular values are stored in D. The algorithm consists of two - stages: - - The first stage consists of deflating the size of the problem - when there are multiple singular values or if there is a zero - in the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine DLASD7. - - The second stage consists of calculating the updated - singular values. This is done by finding the roots of the - secular equation via the routine DLASD4 (as called by DLASD8). - This routine also updates VF and VL and computes the distances - between the updated singular values and the old singular - values. - - DLASD6 is called from DLASDA. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form: - = 0: Compute singular values only. - = 1: Compute singular vectors in factored form as well. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). - On entry D(1:NL,1:NL) contains the singular values of the - upper block, and D(NL+2:N) contains the singular values - of the lower block. On exit D(1:N) contains the singular - values of the modified matrix. - - VF (input/output) DOUBLE PRECISION array, dimension ( M ) - On entry, VF(1:NL+1) contains the first components of all - right singular vectors of the upper block; and VF(NL+2:M) - contains the first components of all right singular vectors - of the lower block. On exit, VF contains the first components - of all right singular vectors of the bidiagonal matrix. - - VL (input/output) DOUBLE PRECISION array, dimension ( M ) - On entry, VL(1:NL+1) contains the last components of all - right singular vectors of the upper block; and VL(NL+2:M) - contains the last components of all right singular vectors of - the lower block. On exit, VL contains the last components of - all right singular vectors of the bidiagonal matrix. - - ALPHA (input) DOUBLE PRECISION - Contains the diagonal element associated with the added row. - - BETA (input) DOUBLE PRECISION - Contains the off-diagonal element associated with the added - row. - - IDXQ (output) INTEGER array, dimension ( N ) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, i.e. - D( IDXQ( I = 1, N ) ) will be in ascending order. - - PERM (output) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) to be applied - to each block. Not referenced if ICOMPQ = 0. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. Not referenced if ICOMPQ = 0. - - GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. Not referenced if ICOMPQ = 0. - - LDGCOL (input) INTEGER - leading dimension of GIVCOL, must be at least N. - - GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value to be used in the - corresponding Givens rotation. Not referenced if ICOMPQ = 0. - - LDGNUM (input) INTEGER - The leading dimension of GIVNUM and POLES, must be at least N. - - POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - On exit, POLES(1,*) is an array containing the new singular - values obtained from solving the secular equation, and - POLES(2,*) is an array containing the poles in the secular - equation. Not referenced if ICOMPQ = 0. - - DIFL (output) DOUBLE PRECISION array, dimension ( N ) - On exit, DIFL(I) is the distance between I-th updated - (undeflated) singular value and the I-th (undeflated) old - singular value. - - DIFR (output) DOUBLE PRECISION array, - dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - On exit, DIFR(I, 1) is the distance between I-th updated - (undeflated) singular value and the I+1-th (undeflated) old - singular value. - - If ICOMPQ = 1, DIFR(1:K,2) is an array containing the - normalizing factors for the right singular vector matrix. - - See DLASD8 for details on DIFL and DIFR. - - Z (output) DOUBLE PRECISION array, dimension ( M ) - The first elements of this array contain the components - of the deflation-adjusted updating row vector. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - C (output) DOUBLE PRECISION - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (output) DOUBLE PRECISION - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) - - IWORK (workspace) INTEGER array, dimension ( 3 * N ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --vf; - --vl; - --idxq; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - poles_dim1 = *ldgnum; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - --difl; - --difr; - --z__; - --work; - --iwork; - - /* Function Body */ - *info = 0; - n = *nl + *nr + 1; - m = n + *sqre; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldgcol < n) { - *info = -14; - } else if (*ldgnum < n) { - *info = -16; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD6", &i__1); - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in DLASD7 and DLASD8. -*/ - - isigma = 1; - iw = isigma + n; - ivfw = iw + m; - ivlw = ivfw + m; - - idx = 1; - idxc = idx + n; - idxp = idxc + n; - -/* - Scale. - - Computing MAX -*/ - d__1 = abs(*alpha), d__2 = abs(*beta); - orgnrm = max(d__1,d__2); - d__[*nl + 1] = 0.; - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { - orgnrm = (d__1 = d__[i__], abs(d__1)); - } -/* L10: */ - } - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &n, &c__1, &d__[1], &n, - info); - *alpha /= orgnrm; - *beta /= orgnrm; - -/* Sort and Deflate singular values. */ - - dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & - work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & - iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ - givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, - info); - -/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ - - dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], - ldgnum, &work[isigma], &work[iw], info); - -/* Save the poles if ICOMPQ = 1. */ - - if (*icompq == 1) { - dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); - dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); - } - -/* Unscale. */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &n, &c__1, &d__[1], &n, - info); - -/* Prepare the IDXQ sorting permutation. */ - - n1 = *k; - n2 = n - *k; - dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); - - return 0; - -/* End of DLASD6 */ - -} /* dlasd6_ */ - -/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr, - integer *sqre, integer *k, doublereal *d__, doublereal *z__, - doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, - doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * - dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, - integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, - integer *ldgnum, doublereal *c__, doublereal *s, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; - doublereal d__1, d__2; - - /* Local variables */ - static integer i__, j, m, n, k2; - static doublereal z1; - static integer jp; - static doublereal eps, tau, tol; - static integer nlp1, nlp2, idxi, idxj; - extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *); - static integer idxjp; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer jprev; - - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *); - static doublereal hlftol; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASD7 merges the two sets of singular values together into a single - sorted set. Then it tries to deflate the size of the problem. There - are two ways in which deflation can occur: when two or more singular - values are close together or if there is a tiny entry in the Z - vector. For each such occurrence the order of the related - secular equation problem is reduced by one. - - DLASD7 is called from DLASD6. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed - in compact form, as follows: - = 0: Compute singular values only. - = 1: Compute singular vectors of upper - bidiagonal matrix in compact form. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has - N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, this is - the order of the related secular equation. 1 <= K <=N. - - D (input/output) DOUBLE PRECISION array, dimension ( N ) - On entry D contains the singular values of the two submatrices - to be combined. On exit D contains the trailing (N-K) updated - singular values (those which were deflated) sorted into - increasing order. - - Z (output) DOUBLE PRECISION array, dimension ( M ) - On exit Z contains the updating row vector in the secular - equation. - - ZW (workspace) DOUBLE PRECISION array, dimension ( M ) - Workspace for Z. - - VF (input/output) DOUBLE PRECISION array, dimension ( M ) - On entry, VF(1:NL+1) contains the first components of all - right singular vectors of the upper block; and VF(NL+2:M) - contains the first components of all right singular vectors - of the lower block. On exit, VF contains the first components - of all right singular vectors of the bidiagonal matrix. - - VFW (workspace) DOUBLE PRECISION array, dimension ( M ) - Workspace for VF. - - VL (input/output) DOUBLE PRECISION array, dimension ( M ) - On entry, VL(1:NL+1) contains the last components of all - right singular vectors of the upper block; and VL(NL+2:M) - contains the last components of all right singular vectors - of the lower block. On exit, VL contains the last components - of all right singular vectors of the bidiagonal matrix. - - VLW (workspace) DOUBLE PRECISION array, dimension ( M ) - Workspace for VL. - - ALPHA (input) DOUBLE PRECISION - Contains the diagonal element associated with the added row. - - BETA (input) DOUBLE PRECISION - Contains the off-diagonal element associated with the added - row. - - DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) - Contains a copy of the diagonal elements (K-1 singular values - and one zero) in the secular equation. - - IDX (workspace) INTEGER array, dimension ( N ) - This will contain the permutation used to sort the contents of - D into ascending order. - - IDXP (workspace) INTEGER array, dimension ( N ) - This will contain the permutation used to place deflated - values of D at the end of the array. On output IDXP(2:K) - points to the nondeflated D-values and IDXP(K+1:N) - points to the deflated singular values. - - IDXQ (input) INTEGER array, dimension ( N ) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that entries in - the first half of this permutation must first be moved one - position backward; and entries in the second half - must first have NL+1 added to their values. - - PERM (output) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) to be applied - to each singular block. Not referenced if ICOMPQ = 0. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. Not referenced if ICOMPQ = 0. - - GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. Not referenced if ICOMPQ = 0. - - LDGCOL (input) INTEGER - The leading dimension of GIVCOL, must be at least N. - - GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value to be used in the - corresponding Givens rotation. Not referenced if ICOMPQ = 0. - - LDGNUM (input) INTEGER - The leading dimension of GIVNUM, must be at least N. - - C (output) DOUBLE PRECISION - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (output) DOUBLE PRECISION - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - --zw; - --vf; - --vfw; - --vl; - --vlw; - --dsigma; - --idx; - --idxp; - --idxq; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - - /* Function Body */ - *info = 0; - n = *nl + *nr + 1; - m = n + *sqre; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldgcol < n) { - *info = -22; - } else if (*ldgnum < n) { - *info = -24; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD7", &i__1); - return 0; - } - - nlp1 = *nl + 1; - nlp2 = *nl + 2; - if (*icompq == 1) { - *givptr = 0; - } - -/* - Generate the first part of the vector Z and move the singular - values in the first part of D one position backward. -*/ - - z1 = *alpha * vl[nlp1]; - vl[nlp1] = 0.; - tau = vf[nlp1]; - for (i__ = *nl; i__ >= 1; --i__) { - z__[i__ + 1] = *alpha * vl[i__]; - vl[i__] = 0.; - vf[i__ + 1] = vf[i__]; - d__[i__ + 1] = d__[i__]; - idxq[i__ + 1] = idxq[i__] + 1; -/* L10: */ - } - vf[1] = tau; - -/* Generate the second part of the vector Z. */ - - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - z__[i__] = *beta * vf[i__]; - vf[i__] = 0.; -/* L20: */ - } - -/* Sort the singular values into increasing order */ - - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - idxq[i__] += nlp1; -/* L30: */ - } - -/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dsigma[i__] = d__[idxq[i__]]; - zw[i__] = z__[idxq[i__]]; - vfw[i__] = vf[idxq[i__]]; - vlw[i__] = vl[idxq[i__]]; -/* L40: */ - } - - dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - idxi = idx[i__] + 1; - d__[i__] = dsigma[idxi]; - z__[i__] = zw[idxi]; - vf[i__] = vfw[idxi]; - vl[i__] = vlw[idxi]; -/* L50: */ - } - -/* Calculate the allowable deflation tolerence */ - - eps = EPSILON; -/* Computing MAX */ - d__1 = abs(*alpha), d__2 = abs(*beta); - tol = max(d__1,d__2); -/* Computing MAX */ - d__2 = (d__1 = d__[n], abs(d__1)); - tol = eps * 64. * max(d__2,tol); - -/* - There are 2 kinds of deflation -- first a value in the z-vector - is small, second two (or more) singular values are very close - together (their difference is small). - - If the value in the z-vector is small, we simply permute the - array so that the corresponding singular value is moved to the - end. - - If two values in the D-vector are close, we perform a two-sided - rotation designed to make one of the corresponding z-vector - entries zero, and then permute the array so that the deflated - singular value is moved to the end. - - If there are multiple singular values then the problem deflates. - Here the number of equal singular values are found. As each equal - singular value is found, an elementary reflector is computed to - rotate the corresponding singular subspace so that the - corresponding components of Z are zero in this new basis. -*/ - - *k = 1; - k2 = n + 1; - i__1 = n; - for (j = 2; j <= i__1; ++j) { - if ((d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - if (j == n) { - goto L100; - } - } else { - jprev = j; - goto L70; - } -/* L60: */ - } -L70: - j = jprev; -L80: - ++j; - if (j > n) { - goto L90; - } - if ((d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - } else { - -/* Check if singular values are close enough to allow deflation. */ - - if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { - -/* Deflation is possible. */ - - *s = z__[jprev]; - *c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = dlapy2_(c__, s); - z__[j] = tau; - z__[jprev] = 0.; - *c__ /= tau; - *s = -(*s) / tau; - -/* Record the appropriate Givens rotation */ - - if (*icompq == 1) { - ++(*givptr); - idxjp = idxq[idx[jprev] + 1]; - idxj = idxq[idx[j] + 1]; - if (idxjp <= nlp1) { - --idxjp; - } - if (idxj <= nlp1) { - --idxj; - } - givcol[*givptr + (givcol_dim1 << 1)] = idxjp; - givcol[*givptr + givcol_dim1] = idxj; - givnum[*givptr + (givnum_dim1 << 1)] = *c__; - givnum[*givptr + givnum_dim1] = *s; - } - drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); - drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); - --k2; - idxp[k2] = jprev; - jprev = j; - } else { - ++(*k); - zw[*k] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - jprev = j; - } - } - goto L80; -L90: - -/* Record the last singular value. */ - - ++(*k); - zw[*k] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - -L100: - -/* - Sort the singular values into DSIGMA. The singular values which - were not deflated go into the first K slots of DSIGMA, except - that DSIGMA(1) is treated separately. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - dsigma[j] = d__[jp]; - vfw[j] = vf[jp]; - vlw[j] = vl[jp]; -/* L110: */ - } - if (*icompq == 1) { - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - perm[j] = idxq[idx[jp] + 1]; - if (perm[j] <= nlp1) { - --perm[j]; - } -/* L120: */ - } - } - -/* - The deflated singular values go back into the last N - K slots of - D. -*/ - - i__1 = n - *k; - dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); - -/* - Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and - VL(M). -*/ - - dsigma[1] = 0.; - hlftol = tol / 2.; - if (abs(dsigma[2]) <= hlftol) { - dsigma[2] = hlftol; - } - if (m > n) { - z__[1] = dlapy2_(&z1, &z__[m]); - if (z__[1] <= tol) { - *c__ = 1.; - *s = 0.; - z__[1] = tol; - } else { - *c__ = z1 / z__[1]; - *s = -z__[m] / z__[1]; - } - drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); - drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); - } else { - if (abs(z1) <= tol) { - z__[1] = tol; - } else { - z__[1] = z1; - } - } - -/* Restore Z, VF, and VL. */ - - i__1 = *k - 1; - dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); - i__1 = n - 1; - dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); - i__1 = n - 1; - dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); - - return 0; - -/* End of DLASD7 */ - -} /* dlasd7_ */ - -/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, - doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, - doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * - work, integer *info) -{ - /* System generated locals */ - integer difr_dim1, difr_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer i__, j; - static doublereal dj, rho; - static integer iwk1, iwk2, iwk3; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - static doublereal temp; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - static integer iwk2i, iwk3i; - static doublereal diflj, difrj, dsigj; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *), dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlaset_(char *, integer *, integer - *, doublereal *, doublereal *, doublereal *, integer *), - xerbla_(char *, integer *); - static doublereal dsigjp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASD8 finds the square roots of the roots of the secular equation, - as defined by the values in DSIGMA and Z. It makes the appropriate - calls to DLASD4, and stores, for each element in D, the distance - to its two nearest poles (elements in DSIGMA). It also updates - the arrays VF and VL, the first and last components of all the - right singular vectors of the original bidiagonal matrix. - - DLASD8 is called from DLASD6. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form in the calling routine: - = 0: Compute singular values only. - = 1: Compute singular vectors in factored form as well. - - K (input) INTEGER - The number of terms in the rational function to be solved - by DLASD4. K >= 1. - - D (output) DOUBLE PRECISION array, dimension ( K ) - On output, D contains the updated singular values. - - Z (input) DOUBLE PRECISION array, dimension ( K ) - The first K elements of this array contain the components - of the deflation-adjusted updating row vector. - - VF (input/output) DOUBLE PRECISION array, dimension ( K ) - On entry, VF contains information passed through DBEDE8. - On exit, VF contains the first K components of the first - components of all right singular vectors of the bidiagonal - matrix. - - VL (input/output) DOUBLE PRECISION array, dimension ( K ) - On entry, VL contains information passed through DBEDE8. - On exit, VL contains the first K components of the last - components of all right singular vectors of the bidiagonal - matrix. - - DIFL (output) DOUBLE PRECISION array, dimension ( K ) - On exit, DIFL(I) = D(I) - DSIGMA(I). - - DIFR (output) DOUBLE PRECISION array, - dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and - dimension ( K ) if ICOMPQ = 0. - On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not - defined and will not be referenced. - - If ICOMPQ = 1, DIFR(1:K,2) is an array containing the - normalizing factors for the right singular vector matrix. - - LDDIFR (input) INTEGER - The leading dimension of DIFR, must be at least K. - - DSIGMA (input) DOUBLE PRECISION array, dimension ( K ) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - --vf; - --vl; - --difl; - difr_dim1 = *lddifr; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - --dsigma; - --work; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*k < 1) { - *info = -2; - } else if (*lddifr < *k) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASD8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 1) { - d__[1] = abs(z__[1]); - difl[1] = d__[1]; - if (*icompq == 1) { - difl[2] = 1.; - difr[(difr_dim1 << 1) + 1] = 1.; - } - return 0; - } - -/* - Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), - which on any of these machines zeros out the bottommost - bit of DSIGMA(I) if it is 1; this makes the subsequent - subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DSIGMA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DSIGMA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; -/* L10: */ - } - -/* Book keeping. */ - - iwk1 = 1; - iwk2 = iwk1 + *k; - iwk3 = iwk2 + *k; - iwk2i = iwk2 - 1; - iwk3i = iwk3 - 1; - -/* Normalize Z. */ - - rho = dnrm2_(k, &z__[1], &c__1); - dlascl_("G", &c__0, &c__0, &rho, &c_b2453, k, &c__1, &z__[1], k, info); - rho *= rho; - -/* Initialize WORK(IWK3). */ - - dlaset_("A", k, &c__1, &c_b2453, &c_b2453, &work[iwk3], k); - -/* - Compute the updated singular values, the arrays DIFL, DIFR, - and the updated Z. -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ - iwk2], info); - -/* If the root finder fails, the computation is terminated. */ - - if (*info != 0) { - return 0; - } - work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; - difl[j] = -work[j]; - difr[j + difr_dim1] = -work[j + 1]; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + - i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ - j]); -/* L20: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + - i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ - j]); -/* L30: */ - } -/* L40: */ - } - -/* Compute updated Z. */ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); - z__[i__] = d_sign(&d__2, &z__[i__]); -/* L50: */ - } - -/* Update VF and VL. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - diflj = difl[j]; - dj = d__[j]; - dsigj = -dsigma[j]; - if (j < *k) { - difrj = -difr[j + difr_dim1]; - dsigjp = -dsigma[j + 1]; - } - work[j] = -z__[j] / diflj / (dsigma[j] + dj); - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( - dsigma[i__] + dj); -/* L60: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / - (dsigma[i__] + dj); -/* L70: */ - } - temp = dnrm2_(k, &work[1], &c__1); - work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; - work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; - if (*icompq == 1) { - difr[j + (difr_dim1 << 1)] = temp; - } -/* L80: */ - } - - dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); - dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); - - return 0; - -/* End of DLASD8 */ - -} /* dlasd8_ */ - -/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, - integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer - *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, - doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, - integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, - doublereal *s, doublereal *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, - difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, - poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, - z_dim1, z_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, - nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; - static doublereal beta; - static integer idxq, nlvl; - static doublereal alpha; - static integer inode, ndiml, ndimr, idxqi, itemp; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer sqrei; - extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, integer *, integer *); - static integer nwork1, nwork2; - extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer - *, integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *), dlasdt_(integer *, integer *, - integer *, integer *, integer *, integer *, integer *), dlaset_( - char *, integer *, integer *, doublereal *, doublereal *, - doublereal *, integer *), xerbla_(char *, integer *); - static integer smlszp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - Using a divide and conquer approach, DLASDA computes the singular - value decomposition (SVD) of a real upper bidiagonal N-by-M matrix - B with diagonal D and offdiagonal E, where M = N + SQRE. The - algorithm computes the singular values in the SVD B = U * S * VT. - The orthogonal matrices U and VT are optionally computed in - compact form. - - A related subroutine, DLASD0, computes the singular values and - the singular vectors in explicit form. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed - in compact form, as follows - = 0: Compute singular values only. - = 1: Compute singular vectors of upper bidiagonal - matrix in compact form. - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The row dimension of the upper bidiagonal matrix. This is - also the dimension of the main diagonal array D. - - SQRE (input) INTEGER - Specifies the column dimension of the bidiagonal matrix. - = 0: The bidiagonal matrix has column dimension M = N; - = 1: The bidiagonal matrix has column dimension M = N + 1. - - D (input/output) DOUBLE PRECISION array, dimension ( N ) - On entry D contains the main diagonal of the bidiagonal - matrix. On exit D, if INFO = 0, contains its singular values. - - E (input) DOUBLE PRECISION array, dimension ( M-1 ) - Contains the subdiagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - U (output) DOUBLE PRECISION array, - dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left - singular vector matrices of all subproblems at the bottom - level. - - LDU (input) INTEGER, LDU = > N. - The leading dimension of arrays U, VT, DIFL, DIFR, POLES, - GIVNUM, and Z. - - VT (output) DOUBLE PRECISION array, - dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right - singular vector matrices of all subproblems at the bottom - level. - - K (output) INTEGER array, - dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. - If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th - secular equation on the computation tree. - - DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), - where NLVL = floor(log_2 (N/SMLSIZ))). - - DIFR (output) DOUBLE PRECISION array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) - record distances between singular values on the I-th - level and singular values on the (I -1)-th level, and - DIFR(1:N, 2 * I ) contains the normalizing factors for - the right singular vector matrix. See DLASD8 for details. - - Z (output) DOUBLE PRECISION array, - dimension ( LDU, NLVL ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - The first K elements of Z(1, I) contain the components of - the deflation-adjusted updating row vector for subproblems - on the I-th level. - - POLES (output) DOUBLE PRECISION array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and - POLES(1, 2*I) contain the new and old singular values - involved in the secular equations on the I-th level. - - GIVPTR (output) INTEGER array, - dimension ( N ) if ICOMPQ = 1, and not referenced if - ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records - the number of Givens rotations performed on the I-th - problem on the computation tree. - - GIVCOL (output) INTEGER array, - dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not - referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, - GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations - of Givens rotations performed on the I-th level on the - computation tree. - - LDGCOL (input) INTEGER, LDGCOL = > N. - The leading dimension of arrays GIVCOL and PERM. - - PERM (output) INTEGER array, - dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records - permutations done on the I-th level of the computation tree. - - GIVNUM (output) DOUBLE PRECISION array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not - referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, - GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- - values of Givens rotations performed on the I-th level on - the computation tree. - - C (output) DOUBLE PRECISION array, - dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. - If ICOMPQ = 1 and the I-th subproblem is not square, on exit, - C( I ) contains the C-value of a Givens rotation related to - the right null space of the I-th subproblem. - - S (output) DOUBLE PRECISION array, dimension ( N ) if - ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 - and the I-th subproblem is not square, on exit, S( I ) - contains the S-value of a Givens rotation related to - the right null space of the I-th subproblem. - - WORK (workspace) DOUBLE PRECISION array, dimension - (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). - - IWORK (workspace) INTEGER array. - Dimension must be at least (7 * N). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - givnum_dim1 = *ldu; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - poles_dim1 = *ldu; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - z_dim1 = *ldu; - z_offset = 1 + z_dim1; - z__ -= z_offset; - difr_dim1 = *ldu; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - difl_dim1 = *ldu; - difl_offset = 1 + difl_dim1; - difl -= difl_offset; - vt_dim1 = *ldu; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - --k; - --givptr; - perm_dim1 = *ldgcol; - perm_offset = 1 + perm_dim1; - perm -= perm_offset; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - --c__; - --s; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*smlsiz < 3) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldu < *n + *sqre) { - *info = -8; - } else if (*ldgcol < *n) { - *info = -17; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASDA", &i__1); - return 0; - } - - m = *n + *sqre; - -/* If the input matrix is too small, call DLASDQ to find the SVD. */ - - if (*n <= *smlsiz) { - if (*icompq == 0) { - dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ - vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & - work[1], info); - } else { - dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] - , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], - info); - } - return 0; - } - -/* Book-keeping and set up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - idxq = ndimr + *n; - iwk = idxq + *n; - - ncc = 0; - nru = 0; - - smlszp = *smlsiz + 1; - vf = 1; - vl = vf + m; - nwork1 = vl + m; - nwork2 = nwork1 + smlszp * smlszp; - - dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - for the nodes on bottom level of the tree, solve - their subproblems by DLASDQ. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nlp1 = nl + 1; - nr = iwork[ndimr + i1]; - nlf = ic - nl; - nrf = ic + 1; - idxqi = idxq + nlf - 2; - vfi = vf + nlf - 1; - vli = vl + nlf - 1; - sqrei = 1; - if (*icompq == 0) { - dlaset_("A", &nlp1, &nlp1, &c_b2467, &c_b2453, &work[nwork1], & - smlszp); - dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & - work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], - &nl, &work[nwork2], info); - itemp = nwork1 + nl * smlszp; - dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); - dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); - } else { - dlaset_("A", &nl, &nl, &c_b2467, &c_b2453, &u[nlf + u_dim1], ldu); - dlaset_("A", &nlp1, &nlp1, &c_b2467, &c_b2453, &vt[nlf + vt_dim1], - ldu); - dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & - vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + - u_dim1], ldu, &work[nwork1], info); - dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); - dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) - ; - } - if (*info != 0) { - return 0; - } - i__2 = nl; - for (j = 1; j <= i__2; ++j) { - iwork[idxqi + j] = j; -/* L10: */ - } - if (i__ == nd && *sqre == 0) { - sqrei = 0; - } else { - sqrei = 1; - } - idxqi += nlp1; - vfi += nlp1; - vli += nlp1; - nrp1 = nr + sqrei; - if (*icompq == 0) { - dlaset_("A", &nrp1, &nrp1, &c_b2467, &c_b2453, &work[nwork1], & - smlszp); - dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & - work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], - &nr, &work[nwork2], info); - itemp = nwork1 + (nrp1 - 1) * smlszp; - dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); - dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); - } else { - dlaset_("A", &nr, &nr, &c_b2467, &c_b2453, &u[nrf + u_dim1], ldu); - dlaset_("A", &nrp1, &nrp1, &c_b2467, &c_b2453, &vt[nrf + vt_dim1], - ldu); - dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & - vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + - u_dim1], ldu, &work[nwork1], info); - dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); - dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) - ; - } - if (*info != 0) { - return 0; - } - i__2 = nr; - for (j = 1; j <= i__2; ++j) { - iwork[idxqi + j] = j; -/* L20: */ - } -/* L30: */ - } - -/* Now conquer each subproblem bottom-up. */ - - j = pow_ii(&c__2, &nlvl); - for (lvl = nlvl; lvl >= 1; --lvl) { - lvl2 = (lvl << 1) - 1; - -/* - Find the first node LF and last node LL on - the current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - if (i__ == ll) { - sqrei = *sqre; - } else { - sqrei = 1; - } - vfi = vf + nlf - 1; - vli = vl + nlf - 1; - idxqi = idxq + nlf - 1; - alpha = d__[ic]; - beta = e[ic]; - if (*icompq == 0) { - dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & - work[vli], &alpha, &beta, &iwork[idxqi], &perm[ - perm_offset], &givptr[1], &givcol[givcol_offset], - ldgcol, &givnum[givnum_offset], ldu, &poles[ - poles_offset], &difl[difl_offset], &difr[difr_offset], - &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], - &iwork[iwk], info); - } else { - --j; - dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & - work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + - lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * - givcol_dim1], ldgcol, &givnum[nlf + lvl2 * - givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & - difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * - difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], - &s[j], &work[nwork1], &iwork[iwk], info); - } - if (*info != 0) { - return 0; - } -/* L40: */ - } -/* L50: */ - } - - return 0; - -/* End of DLASDA */ - -} /* dlasda_ */ - -/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * - ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, - doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, - doublereal *c__, integer *ldc, doublereal *work, integer *info) -{ - /* System generated locals */ - integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2; - - /* Local variables */ - static integer i__, j; - static doublereal r__, cs, sn; - static integer np1, isub; - static doublereal smin; - static integer sqre1; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * - , doublereal *, integer *); - static integer iuplo; - extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *), xerbla_(char *, - integer *), dbdsqr_(char *, integer *, integer *, integer - *, integer *, doublereal *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *); - static logical rotate; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASDQ computes the singular value decomposition (SVD) of a real - (upper or lower) bidiagonal matrix with diagonal D and offdiagonal - E, accumulating the transformations if desired. Letting B denote - the input bidiagonal matrix, the algorithm computes orthogonal - matrices Q and P such that B = Q * S * P' (P' denotes the transpose - of P). The singular values S are overwritten on D. - - The input matrix U is changed to U * Q if desired. - The input matrix VT is changed to P' * VT if desired. - The input matrix C is changed to Q' * C if desired. - - See "Computing Small Singular Values of Bidiagonal Matrices With - Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, - LAPACK Working Note #3, for a detailed description of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - On entry, UPLO specifies whether the input bidiagonal matrix - is upper or lower bidiagonal, and wether it is square are - not. - UPLO = 'U' or 'u' B is upper bidiagonal. - UPLO = 'L' or 'l' B is lower bidiagonal. - - SQRE (input) INTEGER - = 0: then the input matrix is N-by-N. - = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and - (N+1)-by-N if UPLU = 'L'. - - The bidiagonal matrix has - N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - N (input) INTEGER - On entry, N specifies the number of rows and columns - in the matrix. N must be at least 0. - - NCVT (input) INTEGER - On entry, NCVT specifies the number of columns of - the matrix VT. NCVT must be at least 0. - - NRU (input) INTEGER - On entry, NRU specifies the number of rows of - the matrix U. NRU must be at least 0. - - NCC (input) INTEGER - On entry, NCC specifies the number of columns of - the matrix C. NCC must be at least 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, D contains the diagonal entries of the - bidiagonal matrix whose SVD is desired. On normal exit, - D contains the singular values in ascending order. - - E (input/output) DOUBLE PRECISION array. - dimension is (N-1) if SQRE = 0 and N if SQRE = 1. - On entry, the entries of E contain the offdiagonal entries - of the bidiagonal matrix whose SVD is desired. On normal - exit, E will contain 0. If the algorithm does not converge, - D and E will contain the diagonal and superdiagonal entries - of a bidiagonal matrix orthogonally equivalent to the one - given as input. - - VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) - On entry, contains a matrix which on exit has been - premultiplied by P', dimension N-by-NCVT if SQRE = 0 - and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). - - LDVT (input) INTEGER - On entry, LDVT specifies the leading dimension of VT as - declared in the calling (sub) program. LDVT must be at - least 1. If NCVT is nonzero LDVT must also be at least N. - - U (input/output) DOUBLE PRECISION array, dimension (LDU, N) - On entry, contains a matrix which on exit has been - postmultiplied by Q, dimension NRU-by-N if SQRE = 0 - and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). - - LDU (input) INTEGER - On entry, LDU specifies the leading dimension of U as - declared in the calling (sub) program. LDU must be at - least max( 1, NRU ) . - - C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) - On entry, contains an N-by-NCC matrix which on exit - has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 - and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). - - LDC (input) INTEGER - On entry, LDC specifies the leading dimension of C as - declared in the calling (sub) program. LDC must be at - least 1. If NCC is nonzero, LDC must also be at least N. - - WORK (workspace) DOUBLE PRECISION array, dimension (4*N) - Workspace. Only referenced if one of NCVT, NRU, or NCC is - nonzero, and if N is at least 2. - - INFO (output) INTEGER - On exit, a value of 0 indicates a successful exit. - If INFO < 0, argument number -INFO is illegal. - If INFO > 0, the algorithm did not converge, and INFO - specifies how many superdiagonals did not converge. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - iuplo = 0; - if (lsame_(uplo, "U")) { - iuplo = 1; - } - if (lsame_(uplo, "L")) { - iuplo = 2; - } - if (iuplo == 0) { - *info = -1; - } else if (*sqre < 0 || *sqre > 1) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ncvt < 0) { - *info = -4; - } else if (*nru < 0) { - *info = -5; - } else if (*ncc < 0) { - *info = -6; - } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { - *info = -10; - } else if (*ldu < max(1,*nru)) { - *info = -12; - } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { - *info = -14; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASDQ", &i__1); - return 0; - } - if (*n == 0) { - return 0; - } - -/* ROTATE is true if any singular vectors desired, false otherwise */ - - rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; - np1 = *n + 1; - sqre1 = *sqre; - -/* - If matrix non-square upper bidiagonal, rotate to be lower - bidiagonal. The rotations are on the right. -*/ - - if (iuplo == 1 && sqre1 == 1) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (rotate) { - work[i__] = cs; - work[*n + i__] = sn; - } -/* L10: */ - } - dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); - d__[*n] = r__; - e[*n] = 0.; - if (rotate) { - work[*n] = cs; - work[*n + *n] = sn; - } - iuplo = 2; - sqre1 = 0; - -/* Update singular vectors if desired. */ - - if (*ncvt > 0) { - dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ - vt_offset], ldvt); - } - } - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left. -*/ - - if (iuplo == 2) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (rotate) { - work[i__] = cs; - work[*n + i__] = sn; - } -/* L20: */ - } - -/* - If matrix (N+1)-by-N lower bidiagonal, one additional - rotation is needed. -*/ - - if (sqre1 == 1) { - dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); - d__[*n] = r__; - if (rotate) { - work[*n] = cs; - work[*n + *n] = sn; - } - } - -/* Update singular vectors if desired. */ - - if (*nru > 0) { - if (sqre1 == 0) { - dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ - u_offset], ldu); - } else { - dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ - u_offset], ldu); - } - } - if (*ncc > 0) { - if (sqre1 == 0) { - dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ - c_offset], ldc); - } else { - dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ - c_offset], ldc); - } - } - } - -/* - Call DBDSQR to compute the SVD of the reduced real - N-by-N upper bidiagonal matrix. -*/ - - dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ - u_offset], ldu, &c__[c_offset], ldc, &work[1], info); - -/* - Sort the singular values into ascending order (insertion sort on - singular values, but only one transposition per singular vector) -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Scan for smallest D(I). */ - - isub = i__; - smin = d__[i__]; - i__2 = *n; - for (j = i__ + 1; j <= i__2; ++j) { - if (d__[j] < smin) { - isub = j; - smin = d__[j]; - } -/* L30: */ - } - if (isub != i__) { - -/* Swap singular values and vectors. */ - - d__[isub] = d__[i__]; - d__[i__] = smin; - if (*ncvt > 0) { - dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], - ldvt); - } - if (*nru > 0) { - dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] - , &c__1); - } - if (*ncc > 0) { - dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) - ; - } - } -/* L40: */ - } - - return 0; - -/* End of DLASDQ */ - -} /* dlasdq_ */ - -/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer * - inode, integer *ndiml, integer *ndimr, integer *msub) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Builtin functions */ - double log(doublereal); - - /* Local variables */ - static integer i__, il, ir, maxn; - static doublereal temp; - static integer nlvl, llst, ncrnt; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASDT creates a tree of subproblems for bidiagonal divide and - conquer. - - Arguments - ========= - - N (input) INTEGER - On entry, the number of diagonal elements of the - bidiagonal matrix. - - LVL (output) INTEGER - On exit, the number of levels on the computation tree. - - ND (output) INTEGER - On exit, the number of nodes on the tree. - - INODE (output) INTEGER array, dimension ( N ) - On exit, centers of subproblems. - - NDIML (output) INTEGER array, dimension ( N ) - On exit, row dimensions of left children. - - NDIMR (output) INTEGER array, dimension ( N ) - On exit, row dimensions of right children. - - MSUB (input) INTEGER. - On entry, the maximum row dimension each subproblem at the - bottom of the tree can be of. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Find the number of levels on the tree. -*/ - - /* Parameter adjustments */ - --ndimr; - --ndiml; - --inode; - - /* Function Body */ - maxn = max(1,*n); - temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); - *lvl = (integer) temp + 1; - - i__ = *n / 2; - inode[1] = i__ + 1; - ndiml[1] = i__; - ndimr[1] = *n - i__ - 1; - il = 0; - ir = 1; - llst = 1; - i__1 = *lvl - 1; - for (nlvl = 1; nlvl <= i__1; ++nlvl) { - -/* - Constructing the tree at (NLVL+1)-st level. The number of - nodes created on this level is LLST * 2. -*/ - - i__2 = llst - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - il += 2; - ir += 2; - ncrnt = llst + i__; - ndiml[il] = ndiml[ncrnt] / 2; - ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; - inode[il] = inode[ncrnt] - ndimr[il] - 1; - ndiml[ir] = ndimr[ncrnt] / 2; - ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; - inode[ir] = inode[ncrnt] + ndiml[ir] + 1; -/* L10: */ - } - llst <<= 1; -/* L20: */ - } - *nd = (llst << 1) - 1; - - return 0; - -/* End of DLASDT */ - -} /* dlasdt_ */ - -/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal * - alpha, doublereal *beta, doublereal *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLASET initializes an m-by-n matrix A to BETA on the diagonal and - ALPHA on the offdiagonals. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be set. - = 'U': Upper triangular part is set; the strictly lower - triangular part of A is not changed. - = 'L': Lower triangular part is set; the strictly upper - triangular part of A is not changed. - Otherwise: All of the matrix A is set. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - ALPHA (input) DOUBLE PRECISION - The constant to which the offdiagonal elements are to be set. - - BETA (input) DOUBLE PRECISION - The constant to which the diagonal elements are to be set. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On exit, the leading m-by-n submatrix of A is set as follows: - - if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, - if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, - otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, - - and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - -/* - Set the strictly upper triangular or trapezoidal part of the - array to ALPHA. -*/ - - i__1 = *n; - for (j = 2; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j - 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - -/* - Set the strictly lower triangular or trapezoidal part of the - array to ALPHA. -*/ - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L30: */ - } -/* L40: */ - } - - } else { - -/* Set the leading m-by-n submatrix to ALPHA. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L50: */ - } -/* L60: */ - } - } - -/* Set the first min(M,N) diagonal elements to BETA. */ - - i__1 = min(*m,*n); - for (i__ = 1; i__ <= i__1; ++i__) { - a[i__ + i__ * a_dim1] = *beta; -/* L70: */ - } - - return 0; - -/* End of DLASET */ - -} /* dlaset_ */ - -/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e, - doublereal *work, integer *info) -{ - /* System generated locals */ - integer i__1, i__2; - doublereal d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__; - static doublereal eps; - extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal - *, doublereal *, doublereal *); - static doublereal scale; - static integer iinfo; - static doublereal sigmn; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static doublereal sigmx; - extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *); - - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *); - static doublereal safmin; - extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_( - char *, integer *, doublereal *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASQ1 computes the singular values of a real N-by-N bidiagonal - matrix with diagonal D and off-diagonal E. The singular values - are computed to high relative accuracy, in the absence of - denormalization, underflow and overflow. The algorithm was first - presented in - - "Accurate singular values and differential qd algorithms" by K. V. - Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, - 1994, - - and the present implementation is described in "An implementation of - the dqds Algorithm (Positive Case)", LAPACK Working Note. - - Arguments - ========= - - N (input) INTEGER - The number of rows and columns in the matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, D contains the diagonal elements of the - bidiagonal matrix whose SVD is desired. On normal exit, - D contains the singular values in decreasing order. - - E (input/output) DOUBLE PRECISION array, dimension (N) - On entry, elements E(1:N-1) contain the off-diagonal elements - of the bidiagonal matrix whose SVD is desired. - On exit, E is overwritten. - - WORK (workspace) DOUBLE PRECISION array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm failed - = 1, a split was marked by a positive value in E - = 2, current block of Z not diagonalized after 30*N - iterations (in inner while loop) - = 3, termination criterion of outer while loop not met - (program created more than N unreduced blocks) - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --work; - --e; - --d__; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -2; - i__1 = -(*info); - xerbla_("DLASQ1", &i__1); - return 0; - } else if (*n == 0) { - return 0; - } else if (*n == 1) { - d__[1] = abs(d__[1]); - return 0; - } else if (*n == 2) { - dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); - d__[1] = sigmx; - d__[2] = sigmn; - return 0; - } - -/* Estimate the largest singular value. */ - - sigmx = 0.; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = (d__1 = d__[i__], abs(d__1)); -/* Computing MAX */ - d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); - sigmx = max(d__2,d__3); -/* L10: */ - } - d__[*n] = (d__1 = d__[*n], abs(d__1)); - -/* Early return if SIGMX is zero (matrix is already diagonal). */ - - if (sigmx == 0.) { - dlasrt_("D", n, &d__[1], &iinfo); - return 0; - } - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = sigmx, d__2 = d__[i__]; - sigmx = max(d__1,d__2); -/* L20: */ - } - -/* - Copy D and E into WORK (in the Z format) and scale (squaring the - input data makes scaling by a power of the radix pointless). -*/ - - eps = PRECISION; - safmin = SAFEMINIMUM; - scale = sqrt(eps / safmin); - dcopy_(n, &d__[1], &c__1, &work[1], &c__2); - i__1 = *n - 1; - dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); - i__1 = (*n << 1) - 1; - i__2 = (*n << 1) - 1; - dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, - &iinfo); - -/* Compute the q's and e's. */ - - i__1 = (*n << 1) - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing 2nd power */ - d__1 = work[i__]; - work[i__] = d__1 * d__1; -/* L30: */ - } - work[*n * 2] = 0.; - - dlasq2_(n, &work[1], info); - - if (*info == 0) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = sqrt(work[i__]); -/* L40: */ - } - dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & - iinfo); - } - - return 0; - -/* End of DLASQ1 */ - -} /* dlasq1_ */ - -/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal d__, e; - static integer k; - static doublereal s, t; - static integer i0, i4, n0, pp; - static doublereal eps, tol; - static integer ipn4; - static doublereal tol2; - static logical ieee; - static integer nbig; - static doublereal dmin__, emin, emax; - static integer ndiv, iter; - static doublereal qmin, temp, qmax, zmax; - static integer splt, nfail; - static doublereal desig, trace, sigma; - static integer iinfo; - extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, doublereal *, doublereal *, - integer *, integer *, integer *, logical *); - - static integer iwhila, iwhilb; - static doublereal oldemn, safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASQ2 computes all the eigenvalues of the symmetric positive - definite tridiagonal matrix associated with the qd array Z to high - relative accuracy are computed to high relative accuracy, in the - absence of denormalization, underflow and overflow. - - To see the relation of Z to the tridiagonal matrix, let L be a - unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and - let U be an upper bidiagonal matrix with 1's above and diagonal - Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the - symmetric tridiagonal to which it is similar. - - Note : DLASQ2 defines a logical variable, IEEE, which is true - on machines which follow ieee-754 floating-point standard in their - handling of infinities and NaNs, and false otherwise. This variable - is passed to DLASQ3. - - Arguments - ========= - - N (input) INTEGER - The number of rows and columns in the matrix. N >= 0. - - Z (workspace) DOUBLE PRECISION array, dimension ( 4*N ) - On entry Z holds the qd array. On exit, entries 1 to N hold - the eigenvalues in decreasing order, Z( 2*N+1 ) holds the - trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If - N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) - holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of - shifts that failed. - - INFO (output) INTEGER - = 0: successful exit - < 0: if the i-th argument is a scalar and had an illegal - value, then INFO = -i, if the i-th argument is an - array and the j-entry had an illegal value, then - INFO = -(i*100+j) - > 0: the algorithm failed - = 1, a split was marked by a positive value in E - = 2, current block of Z not diagonalized after 30*N - iterations (in inner while loop) - = 3, termination criterion of outer while loop not met - (program created more than N unreduced blocks) - - Further Details - =============== - Local Variables: I0:N0 defines a current unreduced segment of Z. - The shifts are accumulated in SIGMA. Iteration count is in ITER. - Ping-pong is controlled by PP (alternates between 0 and 1). - - ===================================================================== - - - Test the input arguments. - (in case DLASQ2 is not called by DLASQ1) -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - *info = 0; - eps = PRECISION; - safmin = SAFEMINIMUM; - tol = eps * 100.; -/* Computing 2nd power */ - d__1 = tol; - tol2 = d__1 * d__1; - - if (*n < 0) { - *info = -1; - xerbla_("DLASQ2", &c__1); - return 0; - } else if (*n == 0) { - return 0; - } else if (*n == 1) { - -/* 1-by-1 case. */ - - if (z__[1] < 0.) { - *info = -201; - xerbla_("DLASQ2", &c__2); - } - return 0; - } else if (*n == 2) { - -/* 2-by-2 case. */ - - if (z__[2] < 0. || z__[3] < 0.) { - *info = -2; - xerbla_("DLASQ2", &c__2); - return 0; - } else if (z__[3] > z__[1]) { - d__ = z__[3]; - z__[3] = z__[1]; - z__[1] = d__; - } - z__[5] = z__[1] + z__[2] + z__[3]; - if (z__[2] > z__[3] * tol2) { - t = (z__[1] - z__[3] + z__[2]) * .5; - s = z__[3] * (z__[2] / t); - if (s <= t) { - s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); - } else { - s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[1] + (s + z__[2]); - z__[3] *= z__[1] / t; - z__[1] = t; - } - z__[2] = z__[3]; - z__[6] = z__[2] + z__[1]; - return 0; - } - -/* Check for negative data and compute sums of q's and e's. */ - - z__[*n * 2] = 0.; - emin = z__[2]; - qmax = 0.; - zmax = 0.; - d__ = 0.; - e = 0.; - - i__1 = *n - 1 << 1; - for (k = 1; k <= i__1; k += 2) { - if (z__[k] < 0.) { - *info = -(k + 200); - xerbla_("DLASQ2", &c__2); - return 0; - } else if (z__[k + 1] < 0.) { - *info = -(k + 201); - xerbla_("DLASQ2", &c__2); - return 0; - } - d__ += z__[k]; - e += z__[k + 1]; -/* Computing MAX */ - d__1 = qmax, d__2 = z__[k]; - qmax = max(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[k + 1]; - emin = min(d__1,d__2); -/* Computing MAX */ - d__1 = max(qmax,zmax), d__2 = z__[k + 1]; - zmax = max(d__1,d__2); -/* L10: */ - } - if (z__[(*n << 1) - 1] < 0.) { - *info = -((*n << 1) + 199); - xerbla_("DLASQ2", &c__2); - return 0; - } - d__ += z__[(*n << 1) - 1]; -/* Computing MAX */ - d__1 = qmax, d__2 = z__[(*n << 1) - 1]; - qmax = max(d__1,d__2); - zmax = max(qmax,zmax); - -/* Check for diagonality. */ - - if (e == 0.) { - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 1) - 1]; -/* L20: */ - } - dlasrt_("D", n, &z__[1], &iinfo); - z__[(*n << 1) - 1] = d__; - return 0; - } - - trace = d__ + e; - -/* Check for zero data. */ - - if (trace == 0.) { - z__[(*n << 1) - 1] = 0.; - return 0; - } - -/* Check whether the machine is IEEE conformable. */ - - ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) - 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2, - &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; - -/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ - - for (k = *n << 1; k >= 2; k += -2) { - z__[k * 2] = 0.; - z__[(k << 1) - 1] = z__[k]; - z__[(k << 1) - 2] = 0.; - z__[(k << 1) - 3] = z__[k - 1]; -/* L30: */ - } - - i0 = 1; - n0 = *n; - -/* Reverse the qd-array, if warranted. */ - - if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { - ipn4 = i0 + n0 << 2; - i__1 = i0 + n0 - 1 << 1; - for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { - temp = z__[i4 - 3]; - z__[i4 - 3] = z__[ipn4 - i4 - 3]; - z__[ipn4 - i4 - 3] = temp; - temp = z__[i4 - 1]; - z__[i4 - 1] = z__[ipn4 - i4 - 5]; - z__[ipn4 - i4 - 5] = temp; -/* L40: */ - } - } - -/* Initial split checking via dqd and Li's test. */ - - pp = 0; - - for (k = 1; k <= 2; ++k) { - - d__ = z__[(n0 << 2) + pp - 3]; - i__1 = (i0 << 2) + pp; - for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.; - d__ = z__[i4 - 3]; - } else { - d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); - } -/* L50: */ - } - -/* dqd maps Z to ZZ plus Li's test. */ - - emin = z__[(i0 << 2) + pp + 1]; - d__ = z__[(i0 << 2) + pp - 3]; - i__1 = (n0 - 1 << 2) + pp; - for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { - z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.; - z__[i4 - (pp << 1) - 2] = d__; - z__[i4 - (pp << 1)] = 0.; - d__ = z__[i4 + 1]; - } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && - safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { - temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; - z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; - d__ *= temp; - } else { - z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( - pp << 1) - 2]); - d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); - } -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - (pp << 1)]; - emin = min(d__1,d__2); -/* L60: */ - } - z__[(n0 << 2) - pp - 2] = d__; - -/* Now find qmax. */ - - qmax = z__[(i0 << 2) - pp - 2]; - i__1 = (n0 << 2) - pp - 2; - for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4]; - qmax = max(d__1,d__2); -/* L70: */ - } - -/* Prepare for the next iteration on K. */ - - pp = 1 - pp; -/* L80: */ - } - - iter = 2; - nfail = 0; - ndiv = n0 - i0 << 1; - - i__1 = *n + 1; - for (iwhila = 1; iwhila <= i__1; ++iwhila) { - if (n0 < 1) { - goto L150; - } - -/* - While array unfinished do - - E(N0) holds the value of SIGMA when submatrix in I0:N0 - splits from the rest of the array, but is negated. -*/ - - desig = 0.; - if (n0 == *n) { - sigma = 0.; - } else { - sigma = -z__[(n0 << 2) - 1]; - } - if (sigma < 0.) { - *info = 1; - return 0; - } - -/* - Find last unreduced submatrix's top index I0, find QMAX and - EMIN. Find Gershgorin-type bound if Q's much greater than E's. -*/ - - emax = 0.; - if (n0 > i0) { - emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); - } else { - emin = 0.; - } - qmin = z__[(n0 << 2) - 3]; - qmax = qmin; - for (i4 = n0 << 2; i4 >= 8; i4 += -4) { - if (z__[i4 - 5] <= 0.) { - goto L100; - } - if (qmin >= emax * 4.) { -/* Computing MIN */ - d__1 = qmin, d__2 = z__[i4 - 3]; - qmin = min(d__1,d__2); -/* Computing MAX */ - d__1 = emax, d__2 = z__[i4 - 5]; - emax = max(d__1,d__2); - } -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; - qmax = max(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - 5]; - emin = min(d__1,d__2); -/* L90: */ - } - i4 = 4; - -L100: - i0 = i4 / 4; - -/* Store EMIN for passing to DLASQ3. */ - - z__[(n0 << 2) - 1] = emin; - -/* - Put -(initial shift) into DMIN. - - Computing MAX -*/ - d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); - dmin__ = -max(d__1,d__2); - -/* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */ - - pp = 0; - - nbig = (n0 - i0 + 1) * 30; - i__2 = nbig; - for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { - if (i0 > n0) { - goto L130; - } - -/* While submatrix unfinished take a good dqds step. */ - - dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & - nfail, &iter, &ndiv, &ieee); - - pp = 1 - pp; - -/* When EMIN is very small check for splits. */ - - if (pp == 0 && n0 - i0 >= 3) { - if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * - sigma) { - splt = i0 - 1; - qmax = z__[(i0 << 2) - 3]; - emin = z__[(i0 << 2) - 1]; - oldemn = z__[i0 * 4]; - i__3 = n0 - 3 << 2; - for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { - if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= - tol2 * sigma) { - z__[i4 - 1] = -sigma; - splt = i4 / 4; - qmax = 0.; - emin = z__[i4 + 3]; - oldemn = z__[i4 + 4]; - } else { -/* Computing MAX */ - d__1 = qmax, d__2 = z__[i4 + 1]; - qmax = max(d__1,d__2); -/* Computing MIN */ - d__1 = emin, d__2 = z__[i4 - 1]; - emin = min(d__1,d__2); -/* Computing MIN */ - d__1 = oldemn, d__2 = z__[i4]; - oldemn = min(d__1,d__2); - } -/* L110: */ - } - z__[(n0 << 2) - 1] = emin; - z__[n0 * 4] = oldemn; - i0 = splt + 1; - } - } - -/* L120: */ - } - - *info = 2; - return 0; - -/* end IWHILB */ - -L130: - -/* L140: */ - ; - } - - *info = 3; - return 0; - -/* end IWHILA */ - -L150: - -/* Move q's to the front. */ - - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 2) - 3]; -/* L160: */ - } - -/* Sort and compute sum of eigenvalues. */ - - dlasrt_("D", n, &z__[1], &iinfo); - - e = 0.; - for (k = *n; k >= 1; --k) { - e += z__[k]; -/* L170: */ - } - -/* Store trace, sum(eigenvalues) and information on performance. */ - - z__[(*n << 1) + 1] = trace; - z__[(*n << 1) + 2] = e; - z__[(*n << 1) + 3] = (doublereal) iter; -/* Computing 2nd power */ - i__1 = *n; - z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); - z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; - return 0; - -/* End of DLASQ2 */ - -} /* dlasq2_ */ - -/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__, - integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, - doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, - logical *ieee) -{ - /* Initialized data */ - - static integer ttype = 0; - static doublereal dmin1 = 0.; - static doublereal dmin2 = 0.; - static doublereal dn = 0.; - static doublereal dn1 = 0.; - static doublereal dn2 = 0.; - static doublereal tau = 0.; - - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal s, t; - static integer j4, nn; - static doublereal eps, tol; - static integer n0in, ipn4; - static doublereal tol2, temp; - extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *, - integer *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *) - , dlasq5_(integer *, integer *, doublereal *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, logical *), dlasq6_( - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *); - - static doublereal safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - May 17, 2000 - - - Purpose - ======= - - DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. - In case of failure it changes shifts, and tries again until output - is positive. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) DOUBLE PRECISION array, dimension ( 4*N ) - Z holds the qd array. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - DMIN (output) DOUBLE PRECISION - Minimum value of d. - - SIGMA (output) DOUBLE PRECISION - Sum of shifts used in current segment. - - DESIG (input/output) DOUBLE PRECISION - Lower order part of SIGMA - - QMAX (input) DOUBLE PRECISION - Maximum value of q. - - NFAIL (output) INTEGER - Number of times shift was too big. - - ITER (output) INTEGER - Number of iterations. - - NDIV (output) INTEGER - Number of divisions. - - TTYPE (output) INTEGER - Shift type. - - IEEE (input) LOGICAL - Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - - n0in = *n0; - eps = PRECISION; - safmin = SAFEMINIMUM; - tol = eps * 100.; -/* Computing 2nd power */ - d__1 = tol; - tol2 = d__1 * d__1; - -/* Check for deflation. */ - -L10: - - if (*n0 < *i0) { - return 0; - } - if (*n0 == *i0) { - goto L20; - } - nn = (*n0 << 2) + *pp; - if (*n0 == *i0 + 1) { - goto L40; - } - -/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ - - if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - - 4] > tol2 * z__[nn - 7]) { - goto L30; - } - -L20: - - z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; - --(*n0); - goto L10; - -/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ - -L30: - - if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ - nn - 11]) { - goto L50; - } - -L40: - - if (z__[nn - 3] > z__[nn - 7]) { - s = z__[nn - 3]; - z__[nn - 3] = z__[nn - 7]; - z__[nn - 7] = s; - } - if (z__[nn - 5] > z__[nn - 3] * tol2) { - t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; - s = z__[nn - 3] * (z__[nn - 5] / t); - if (s <= t) { - s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); - } else { - s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[nn - 7] + (s + z__[nn - 5]); - z__[nn - 3] *= z__[nn - 7] / t; - z__[nn - 7] = t; - } - z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; - z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; - *n0 += -2; - goto L10; - -L50: - -/* Reverse the qd-array, if warranted. */ - - if (*dmin__ <= 0. || *n0 < n0in) { - if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { - ipn4 = *i0 + *n0 << 2; - i__1 = *i0 + *n0 - 1 << 1; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - temp = z__[j4 - 3]; - z__[j4 - 3] = z__[ipn4 - j4 - 3]; - z__[ipn4 - j4 - 3] = temp; - temp = z__[j4 - 2]; - z__[j4 - 2] = z__[ipn4 - j4 - 2]; - z__[ipn4 - j4 - 2] = temp; - temp = z__[j4 - 1]; - z__[j4 - 1] = z__[ipn4 - j4 - 5]; - z__[ipn4 - j4 - 5] = temp; - temp = z__[j4]; - z__[j4] = z__[ipn4 - j4 - 4]; - z__[ipn4 - j4 - 4] = temp; -/* L60: */ - } - if (*n0 - *i0 <= 4) { - z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; - z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; - } -/* Computing MIN */ - d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; - dmin2 = min(d__1,d__2); -/* Computing MIN */ - d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] - , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; - z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); -/* Computing MIN */ - d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = - min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; - z__[(*n0 << 2) - *pp] = min(d__1,d__2); -/* Computing MAX */ - d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, - d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; - *qmax = max(d__1,d__2); - *dmin__ = -0.; - } - } - -/* - L70: - - Computing MIN -*/ - d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 = - min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp]; - if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) { - -/* Choose a shift. */ - - dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1, - &dn2, &tau, &ttype); - -/* Call dqds until DMIN > 0. */ - -L80: - - dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1, - &dn2, ieee); - - *ndiv += *n0 - *i0 + 2; - ++(*iter); - -/* Check status. */ - - if (*dmin__ >= 0. && dmin1 > 0.) { - -/* Success. */ - - goto L100; - - } else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < - tol * (*sigma + dn1) && abs(dn) < tol * *sigma) { - -/* Convergence hidden by negative DN. */ - - z__[(*n0 - 1 << 2) - *pp + 2] = 0.; - *dmin__ = 0.; - goto L100; - } else if (*dmin__ < 0.) { - -/* TAU too big. Select new TAU and try again. */ - - ++(*nfail); - if (ttype < -22) { - -/* Failed twice. Play it safe. */ - - tau = 0.; - } else if (dmin1 > 0.) { - -/* Late failure. Gives excellent shift. */ - - tau = (tau + *dmin__) * (1. - eps * 2.); - ttype += -11; - } else { - -/* Early failure. Divide by 4. */ - - tau *= .25; - ttype += -12; - } - goto L80; - } else if (*dmin__ != *dmin__) { - -/* NaN. */ - - tau = 0.; - goto L80; - } else { - -/* Possible underflow. Play it safe. */ - - goto L90; - } - } - -/* Risk of underflow. */ - -L90: - dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2); - *ndiv += *n0 - *i0 + 2; - ++(*iter); - tau = 0.; - -L100: - if (tau < *sigma) { - *desig += tau; - t = *sigma + *desig; - *desig -= t - *sigma; - } else { - t = *sigma + tau; - *desig = *sigma - (t - tau) + *desig; - } - *sigma = t; - - return 0; - -/* End of DLASQ3 */ - -} /* dlasq3_ */ - -/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__, - integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, - doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, - doublereal *tau, integer *ttype) -{ - /* Initialized data */ - - static doublereal g = 0.; - - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal s, a2, b1, b2; - static integer i4, nn, np; - static doublereal gam, gap1, gap2; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASQ4 computes an approximation TAU to the smallest eigenvalue - using values of d from the previous transform. - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) DOUBLE PRECISION array, dimension ( 4*N ) - Z holds the qd array. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - NOIN (input) INTEGER - The value of N0 at start of EIGTEST. - - DMIN (input) DOUBLE PRECISION - Minimum value of d. - - DMIN1 (input) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ). - - DMIN2 (input) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (input) DOUBLE PRECISION - d(N) - - DN1 (input) DOUBLE PRECISION - d(N-1) - - DN2 (input) DOUBLE PRECISION - d(N-2) - - TAU (output) DOUBLE PRECISION - This is the shift. - - TTYPE (output) INTEGER - Shift type. - - Further Details - =============== - CNST1 = 9/16 - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - -/* - A negative DMIN forces the shift to take that absolute value - TTYPE records the type of shift. -*/ - - if (*dmin__ <= 0.) { - *tau = -(*dmin__); - *ttype = -1; - return 0; - } - - nn = (*n0 << 2) + *pp; - if (*n0in == *n0) { - -/* No eigenvalues deflated. */ - - if (*dmin__ == *dn || *dmin__ == *dn1) { - - b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); - b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); - a2 = z__[nn - 7] + z__[nn - 5]; - -/* Cases 2 and 3. */ - - if (*dmin__ == *dn && *dmin1 == *dn1) { - gap2 = *dmin2 - a2 - *dmin2 * .25; - if (gap2 > 0. && gap2 > b2) { - gap1 = a2 - *dn - b2 / gap2 * b2; - } else { - gap1 = a2 - *dn - (b1 + b2); - } - if (gap1 > 0. && gap1 > b1) { -/* Computing MAX */ - d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; - s = max(d__1,d__2); - *ttype = -2; - } else { - s = 0.; - if (*dn > b1) { - s = *dn - b1; - } - if (a2 > b1 + b2) { -/* Computing MIN */ - d__1 = s, d__2 = a2 - (b1 + b2); - s = min(d__1,d__2); - } -/* Computing MAX */ - d__1 = s, d__2 = *dmin__ * .333; - s = max(d__1,d__2); - *ttype = -3; - } - } else { - -/* Case 4. */ - - *ttype = -4; - s = *dmin__ * .25; - if (*dmin__ == *dn) { - gam = *dn; - a2 = 0.; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b2 = z__[nn - 5] / z__[nn - 7]; - np = nn - 9; - } else { - np = nn - (*pp << 1); - b2 = z__[np - 2]; - gam = *dn1; - if (z__[np - 4] > z__[np - 2]) { - return 0; - } - a2 = z__[np - 4] / z__[np - 2]; - if (z__[nn - 9] > z__[nn - 11]) { - return 0; - } - b2 = z__[nn - 9] / z__[nn - 11]; - np = nn - 13; - } - -/* Approximate contribution to norm squared from I < NN-1. */ - - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = np; i4 >= i__1; i4 += -4) { - if (b2 == 0.) { - goto L20; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (max(b2,b1) * 100. < a2 || .563 < a2) { - goto L20; - } -/* L10: */ - } -L20: - a2 *= 1.05; - -/* Rayleigh quotient residual bound. */ - - if (a2 < .563) { - s = gam * (1. - sqrt(a2)) / (a2 + 1.); - } - } - } else if (*dmin__ == *dn2) { - -/* Case 5. */ - - *ttype = -5; - s = *dmin__ * .25; - -/* Compute contribution to norm squared from I > NN-2. */ - - np = nn - (*pp << 1); - b1 = z__[np - 2]; - b2 = z__[np - 6]; - gam = *dn2; - if (z__[np - 8] > b2 || z__[np - 4] > b1) { - return 0; - } - a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); - -/* Approximate contribution to norm squared from I < NN-2. */ - - if (*n0 - *i0 > 2) { - b2 = z__[nn - 13] / z__[nn - 15]; - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = nn - 17; i4 >= i__1; i4 += -4) { - if (b2 == 0.) { - goto L40; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (max(b2,b1) * 100. < a2 || .563 < a2) { - goto L40; - } -/* L30: */ - } -L40: - a2 *= 1.05; - } - - if (a2 < .563) { - s = gam * (1. - sqrt(a2)) / (a2 + 1.); - } - } else { - -/* Case 6, no information to guide us. */ - - if (*ttype == -6) { - g += (1. - g) * .333; - } else if (*ttype == -18) { - g = .083250000000000005; - } else { - g = .25; - } - s = g * *dmin__; - *ttype = -6; - } - - } else if (*n0in == *n0 + 1) { - -/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ - - if (*dmin1 == *dn1 && *dmin2 == *dn2) { - -/* Cases 7 and 8. */ - - *ttype = -7; - s = *dmin1 * .333; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.) { - goto L60; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - a2 = b1; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (max(b1,a2) * 100. < b2) { - goto L60; - } -/* L50: */ - } -L60: - b2 = sqrt(b2 * 1.05); -/* Computing 2nd power */ - d__1 = b2; - a2 = *dmin1 / (d__1 * d__1 + 1.); - gap2 = *dmin2 * .5 - a2; - if (gap2 > 0. && gap2 > b2 * a2) { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); - s = max(d__1,d__2); - } else { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - b2 * 1.01); - s = max(d__1,d__2); - *ttype = -8; - } - } else { - -/* Case 9. */ - - s = *dmin1 * .25; - if (*dmin1 == *dn1) { - s = *dmin1 * .5; - } - *ttype = -9; - } - - } else if (*n0in == *n0 + 2) { - -/* - Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. - - Cases 10 and 11. -*/ - - if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { - *ttype = -10; - s = *dmin2 * .333; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.) { - goto L80; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (b1 * 100. < b2) { - goto L80; - } -/* L70: */ - } -L80: - b2 = sqrt(b2 * 1.05); -/* Computing 2nd power */ - d__1 = b2; - a2 = *dmin2 / (d__1 * d__1 + 1.); - gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ - nn - 9]) - a2; - if (gap2 > 0. && gap2 > b2 * a2) { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); - s = max(d__1,d__2); - } else { -/* Computing MAX */ - d__1 = s, d__2 = a2 * (1. - b2 * 1.01); - s = max(d__1,d__2); - } - } else { - s = *dmin2 * .25; - *ttype = -11; - } - } else if (*n0in > *n0 + 2) { - -/* Case 12, more than two eigenvalues deflated. No information. */ - - s = 0.; - *ttype = -12; - } - - *tau = s; - return 0; - -/* End of DLASQ4 */ - -} /* dlasq4_ */ - -/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__, - integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, - doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, - logical *ieee) -{ - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - - /* Local variables */ - static doublereal d__; - static integer j4, j4p2; - static doublereal emin, temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - May 17, 2000 - - - Purpose - ======= - - DLASQ5 computes one dqds transform in ping-pong form, one - version for IEEE machines another for non IEEE machines. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) DOUBLE PRECISION array, dimension ( 4*N ) - Z holds the qd array. EMIN is stored in Z(4*N0) to avoid - an extra argument. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - TAU (input) DOUBLE PRECISION - This is the shift. - - DMIN (output) DOUBLE PRECISION - Minimum value of d. - - DMIN1 (output) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ). - - DMIN2 (output) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (output) DOUBLE PRECISION - d(N0), the last value of d. - - DNM1 (output) DOUBLE PRECISION - d(N0-1). - - DNM2 (output) DOUBLE PRECISION - d(N0-2). - - IEEE (input) LOGICAL - Flag for IEEE or non IEEE arithmetic. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4] - *tau; - *dmin__ = d__; - *dmin1 = -z__[j4]; - - if (*ieee) { - -/* Code for IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - temp = z__[j4 + 1] / z__[j4 - 2]; - d__ = d__ * temp - *tau; - *dmin__ = min(*dmin__,d__); - z__[j4] = z__[j4 - 1] * temp; -/* Computing MIN */ - d__1 = z__[j4]; - emin = min(d__1,emin); -/* L10: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - temp = z__[j4 + 2] / z__[j4 - 3]; - d__ = d__ * temp - *tau; - *dmin__ = min(*dmin__,d__); - z__[j4 - 1] = z__[j4] * temp; -/* Computing MIN */ - d__1 = z__[j4 - 1]; - emin = min(d__1,emin); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - *dmin__ = min(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - *dmin__ = min(*dmin__,*dn); - - } else { - -/* Code for non IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (d__ < 0.) { - return 0; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; - } - *dmin__ = min(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4]; - emin = min(d__1,d__2); -/* L30: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (d__ < 0.) { - return 0; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; - } - *dmin__ = min(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4 - 1]; - emin = min(d__1,d__2); -/* L40: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (*dnm2 < 0.) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - } - *dmin__ = min(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (*dnm1 < 0.) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - } - *dmin__ = min(*dmin__,*dn); - - } - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of DLASQ5 */ - -} /* dlasq5_ */ - -/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__, - integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, - doublereal *dn, doublereal *dnm1, doublereal *dnm2) -{ - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - - /* Local variables */ - static doublereal d__; - static integer j4, j4p2; - static doublereal emin, temp; - - static doublereal safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - DLASQ6 computes one dqd (shift equal to zero) transform in - ping-pong form, with protection against underflow and overflow. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) DOUBLE PRECISION array, dimension ( 4*N ) - Z holds the qd array. EMIN is stored in Z(4*N0) to avoid - an extra argument. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - DMIN (output) DOUBLE PRECISION - Minimum value of d. - - DMIN1 (output) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ). - - DMIN2 (output) DOUBLE PRECISION - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (output) DOUBLE PRECISION - d(N0), the last value of d. - - DNM1 (output) DOUBLE PRECISION - d(N0-1). - - DNM2 (output) DOUBLE PRECISION - d(N0-2). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - safmin = SAFEMINIMUM; - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4]; - *dmin__ = d__; - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - d__ = z__[j4 + 1]; - *dmin__ = d__; - emin = 0.; - } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 - - 2] < z__[j4 + 1]) { - temp = z__[j4 + 1] / z__[j4 - 2]; - z__[j4] = z__[j4 - 1] * temp; - d__ *= temp; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); - } - *dmin__ = min(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4]; - emin = min(d__1,d__2); -/* L10: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (z__[j4 - 3] == 0.) { - z__[j4 - 1] = 0.; - d__ = z__[j4 + 2]; - *dmin__ = d__; - emin = 0.; - } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 - - 3] < z__[j4 + 2]) { - temp = z__[j4 + 2] / z__[j4 - 3]; - z__[j4 - 1] = z__[j4] * temp; - d__ *= temp; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); - } - *dmin__ = min(*dmin__,d__); -/* Computing MIN */ - d__1 = emin, d__2 = z__[j4 - 1]; - emin = min(d__1,d__2); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - *dnm1 = z__[j4p2 + 2]; - *dmin__ = *dnm1; - emin = 0.; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dnm1 = *dnm2 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); - } - *dmin__ = min(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (z__[j4 - 2] == 0.) { - z__[j4] = 0.; - *dn = z__[j4p2 + 2]; - *dmin__ = *dn; - emin = 0.; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dn = *dnm1 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); - } - *dmin__ = min(*dmin__,*dn); - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of DLASQ6 */ - -} /* dlasq6_ */ - -/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m, - integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * - lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, info; - static doublereal temp; - extern logical lsame_(char *, char *); - static doublereal ctemp, stemp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLASR performs the transformation - - A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) - - A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) - - where A is an m by n real matrix and P is an orthogonal matrix, - consisting of a sequence of plane rotations determined by the - parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' - and z = n when SIDE = 'R' or 'r' ): - - When DIRECT = 'F' or 'f' ( Forward sequence ) then - - P = P( z - 1 )*...*P( 2 )*P( 1 ), - - and when DIRECT = 'B' or 'b' ( Backward sequence ) then - - P = P( 1 )*P( 2 )*...*P( z - 1 ), - - where P( k ) is a plane rotation matrix for the following planes: - - when PIVOT = 'V' or 'v' ( Variable pivot ), - the plane ( k, k + 1 ) - - when PIVOT = 'T' or 't' ( Top pivot ), - the plane ( 1, k + 1 ) - - when PIVOT = 'B' or 'b' ( Bottom pivot ), - the plane ( k, z ) - - c( k ) and s( k ) must contain the cosine and sine that define the - matrix P( k ). The two by two plane rotation part of the matrix - P( k ), R( k ), is assumed to be of the form - - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - This version vectorises across rows of the array A when SIDE = 'L'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - Specifies whether the plane rotation matrix P is applied to - A on the left or the right. - = 'L': Left, compute A := P*A - = 'R': Right, compute A:= A*P' - - DIRECT (input) CHARACTER*1 - Specifies whether P is a forward or backward sequence of - plane rotations. - = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) - = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) - - PIVOT (input) CHARACTER*1 - Specifies the plane for which P(k) is a plane rotation - matrix. - = 'V': Variable pivot, the plane (k,k+1) - = 'T': Top pivot, the plane (1,k+1) - = 'B': Bottom pivot, the plane (k,z) - - M (input) INTEGER - The number of rows of the matrix A. If m <= 1, an immediate - return is effected. - - N (input) INTEGER - The number of columns of the matrix A. If n <= 1, an - immediate return is effected. - - C, S (input) DOUBLE PRECISION arrays, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - c(k) and s(k) contain the cosine and sine that define the - matrix P(k). The two by two plane rotation part of the - matrix P(k), R(k), is assumed to be of the form - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - The m by n matrix A. On exit, A is overwritten by P*A if - SIDE = 'R' or by A*P' if SIDE = 'L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - --c__; - --s; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! (lsame_(side, "L") || lsame_(side, "R"))) { - info = 1; - } else if (! (lsame_(pivot, "V") || lsame_(pivot, - "T") || lsame_(pivot, "B"))) { - info = 2; - } else if (! (lsame_(direct, "F") || lsame_(direct, - "B"))) { - info = 3; - } else if (*m < 0) { - info = 4; - } else if (*n < 0) { - info = 5; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("DLASR ", &info); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form P * A */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + 1 + i__ * a_dim1]; - a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * - a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j - + i__ * a_dim1]; -/* L10: */ - } - } -/* L20: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + 1 + i__ * a_dim1]; - a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * - a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j - + i__ * a_dim1]; -/* L30: */ - } - } -/* L40: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *m; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ - i__ * a_dim1 + 1]; - a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ - i__ * a_dim1 + 1]; -/* L50: */ - } - } -/* L60: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ - i__ * a_dim1 + 1]; - a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ - i__ * a_dim1 + 1]; -/* L70: */ - } - } -/* L80: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] - + ctemp * temp; - a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * - a_dim1] - stemp * temp; -/* L90: */ - } - } -/* L100: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] - + ctemp * temp; - a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * - a_dim1] - stemp * temp; -/* L110: */ - } - } -/* L120: */ - } - } - } - } else if (lsame_(side, "R")) { - -/* Form A * P' */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + (j + 1) * a_dim1]; - a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * - a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ - i__ + j * a_dim1]; -/* L130: */ - } - } -/* L140: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + (j + 1) * a_dim1]; - a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * - a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ - i__ + j * a_dim1]; -/* L150: */ - } - } -/* L160: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ - i__ + a_dim1]; - a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + - a_dim1]; -/* L170: */ - } - } -/* L180: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ - i__ + a_dim1]; - a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + - a_dim1]; -/* L190: */ - } - } -/* L200: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] - + ctemp * temp; - a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * - a_dim1] - stemp * temp; -/* L210: */ - } - } -/* L220: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] - + ctemp * temp; - a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * - a_dim1] - stemp * temp; -/* L230: */ - } - } -/* L240: */ - } - } - } - } - - return 0; - -/* End of DLASR */ - -} /* dlasr_ */ - -/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer * - info) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - static integer i__, j; - static doublereal d1, d2, d3; - static integer dir; - static doublereal tmp; - static integer endd; - extern logical lsame_(char *, char *); - static integer stack[64] /* was [2][32] */; - static doublereal dmnmx; - static integer start; - extern /* Subroutine */ int xerbla_(char *, integer *); - static integer stkpnt; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - Sort the numbers in D in increasing order (if ID = 'I') or - in decreasing order (if ID = 'D' ). - - Use Quick Sort, reverting to Insertion sort on arrays of - size <= 20. Dimension of STACK limits N to about 2**32. - - Arguments - ========= - - ID (input) CHARACTER*1 - = 'I': sort D in increasing order; - = 'D': sort D in decreasing order. - - N (input) INTEGER - The length of the array D. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the array to be sorted. - On exit, D has been sorted into increasing order - (D(1) <= ... <= D(N) ) or into decreasing order - (D(1) >= ... >= D(N) ), depending on ID. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input paramters. -*/ - - /* Parameter adjustments */ - --d__; - - /* Function Body */ - *info = 0; - dir = -1; - if (lsame_(id, "D")) { - dir = 0; - } else if (lsame_(id, "I")) { - dir = 1; - } - if (dir == -1) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLASRT", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 1) { - return 0; - } - - stkpnt = 1; - stack[0] = 1; - stack[1] = *n; -L10: - start = stack[(stkpnt << 1) - 2]; - endd = stack[(stkpnt << 1) - 1]; - --stkpnt; - if (endd - start <= 20 && endd - start > 0) { - -/* Do Insertion sort on D( START:ENDD ) */ - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] > d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L30; - } -/* L20: */ - } -L30: - ; - } - - } else { - -/* Sort into increasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] < d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L50; - } -/* L40: */ - } -L50: - ; - } - - } - - } else if (endd - start > 20) { - -/* - Partition D( START:ENDD ) and stack parts, largest one first - - Choose partition entry as median of 3 -*/ - - d1 = d__[start]; - d2 = d__[endd]; - i__ = (start + endd) / 2; - d3 = d__[i__]; - if (d1 < d2) { - if (d3 < d1) { - dmnmx = d1; - } else if (d3 < d2) { - dmnmx = d3; - } else { - dmnmx = d2; - } - } else { - if (d3 < d2) { - dmnmx = d2; - } else if (d3 < d1) { - dmnmx = d3; - } else { - dmnmx = d1; - } - } - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__ = start - 1; - j = endd + 1; -L60: -L70: - --j; - if (d__[j] < dmnmx) { - goto L70; - } -L80: - ++i__; - if (d__[i__] > dmnmx) { - goto L80; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L60; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } else { - -/* Sort into increasing order */ - - i__ = start - 1; - j = endd + 1; -L90: -L100: - --j; - if (d__[j] > dmnmx) { - goto L100; - } -L110: - ++i__; - if (d__[i__] < dmnmx) { - goto L110; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L90; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } - } - if (stkpnt > 0) { - goto L10; - } - return 0; - -/* End of DLASRT */ - -} /* dlasrt_ */ - -/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx, - doublereal *scale, doublereal *sumsq) -{ - /* System generated locals */ - integer i__1, i__2; - doublereal d__1; - - /* Local variables */ - static integer ix; - static doublereal absxi; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASSQ returns the values scl and smsq such that - - ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, - - where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is - assumed to be non-negative and scl returns the value - - scl = max( scale, abs( x( i ) ) ). - - scale and sumsq must be supplied in SCALE and SUMSQ and - scl and smsq are overwritten on SCALE and SUMSQ respectively. - - The routine makes only one pass through the vector x. - - Arguments - ========= - - N (input) INTEGER - The number of elements to be used from the vector X. - - X (input) DOUBLE PRECISION array, dimension (N) - The vector for which a scaled sum of squares is computed. - x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. - - INCX (input) INTEGER - The increment between successive values of the vector X. - INCX > 0. - - SCALE (input/output) DOUBLE PRECISION - On entry, the value scale in the equation above. - On exit, SCALE is overwritten with scl , the scaling factor - for the sum of squares. - - SUMSQ (input/output) DOUBLE PRECISION - On entry, the value sumsq in the equation above. - On exit, SUMSQ is overwritten with smsq , the basic sum of - squares from which scl has been factored out. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n > 0) { - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - if (x[ix] != 0.) { - absxi = (d__1 = x[ix], abs(d__1)); - if (*scale < absxi) { -/* Computing 2nd power */ - d__1 = *scale / absxi; - *sumsq = *sumsq * (d__1 * d__1) + 1; - *scale = absxi; - } else { -/* Computing 2nd power */ - d__1 = absxi / *scale; - *sumsq += d__1 * d__1; - } - } -/* L10: */ - } - } - return 0; - -/* End of DLASSQ */ - -} /* dlassq_ */ - -/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__, - doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * - csr, doublereal *snl, doublereal *csl) -{ - /* System generated locals */ - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, - clt, crt, slt, srt; - static integer pmax; - static doublereal temp; - static logical swap; - static doublereal tsign; - - static logical gasmal; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLASV2 computes the singular value decomposition of a 2-by-2 - triangular matrix - [ F G ] - [ 0 H ]. - On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the - smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and - right singular vectors for abs(SSMAX), giving the decomposition - - [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] - [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. - - Arguments - ========= - - F (input) DOUBLE PRECISION - The (1,1) element of the 2-by-2 matrix. - - G (input) DOUBLE PRECISION - The (1,2) element of the 2-by-2 matrix. - - H (input) DOUBLE PRECISION - The (2,2) element of the 2-by-2 matrix. - - SSMIN (output) DOUBLE PRECISION - abs(SSMIN) is the smaller singular value. - - SSMAX (output) DOUBLE PRECISION - abs(SSMAX) is the larger singular value. - - SNL (output) DOUBLE PRECISION - CSL (output) DOUBLE PRECISION - The vector (CSL, SNL) is a unit left singular vector for the - singular value abs(SSMAX). - - SNR (output) DOUBLE PRECISION - CSR (output) DOUBLE PRECISION - The vector (CSR, SNR) is a unit right singular vector for the - singular value abs(SSMAX). - - Further Details - =============== - - Any input parameter may be aliased with any output parameter. - - Barring over/underflow and assuming a guard digit in subtraction, all - output quantities are correct to within a few units in the last - place (ulps). - - In IEEE arithmetic, the code works correctly if one matrix element is - infinite. - - Overflow will not occur unless the largest singular value itself - overflows or is within a few ulps of overflow. (On machines with - partial overflow, like the Cray, overflow may occur if the largest - singular value is within a factor of 2 of overflow.) - - Underflow is harmless if underflow is gradual. Otherwise, results - may correspond to a matrix modified by perturbations of size near - the underflow threshold. - - ===================================================================== -*/ - - - ft = *f; - fa = abs(ft); - ht = *h__; - ha = abs(*h__); - -/* - PMAX points to the maximum absolute element of matrix - PMAX = 1 if F largest in absolute values - PMAX = 2 if G largest in absolute values - PMAX = 3 if H largest in absolute values -*/ - - pmax = 1; - swap = ha > fa; - if (swap) { - pmax = 3; - temp = ft; - ft = ht; - ht = temp; - temp = fa; - fa = ha; - ha = temp; - -/* Now FA .ge. HA */ - - } - gt = *g; - ga = abs(gt); - if (ga == 0.) { - -/* Diagonal matrix */ - - *ssmin = ha; - *ssmax = fa; - clt = 1.; - crt = 1.; - slt = 0.; - srt = 0.; - } else { - gasmal = TRUE_; - if (ga > fa) { - pmax = 2; - if (fa / ga < EPSILON) { - -/* Case of very large GA */ - - gasmal = FALSE_; - *ssmax = ga; - if (ha > 1.) { - *ssmin = fa / (ga / ha); - } else { - *ssmin = fa / ga * ha; - } - clt = 1.; - slt = ht / gt; - srt = 1.; - crt = ft / gt; - } - } - if (gasmal) { - -/* Normal case */ - - d__ = fa - ha; - if (d__ == fa) { - -/* Copes with infinite F or H */ - - l = 1.; - } else { - l = d__ / fa; - } - -/* Note that 0 .le. L .le. 1 */ - - m = gt / ft; - -/* Note that abs(M) .le. 1/macheps */ - - t = 2. - l; - -/* Note that T .ge. 1 */ - - mm = m * m; - tt = t * t; - s = sqrt(tt + mm); - -/* Note that 1 .le. S .le. 1 + 1/macheps */ - - if (l == 0.) { - r__ = abs(m); - } else { - r__ = sqrt(l * l + mm); - } - -/* Note that 0 .le. R .le. 1 + 1/macheps */ - - a = (s + r__) * .5; - -/* Note that 1 .le. A .le. 1 + abs(M) */ - - *ssmin = ha / a; - *ssmax = fa * a; - if (mm == 0.) { - -/* Note that M is very tiny */ - - if (l == 0.) { - t = d_sign(&c_b5242, &ft) * d_sign(&c_b2453, >); - } else { - t = gt / d_sign(&d__, &ft) + m / t; - } - } else { - t = (m / (s + t) + m / (r__ + l)) * (a + 1.); - } - l = sqrt(t * t + 4.); - crt = 2. / l; - srt = t / l; - clt = (crt + srt * m) / a; - slt = ht / ft * srt / a; - } - } - if (swap) { - *csl = srt; - *snl = crt; - *csr = slt; - *snr = clt; - } else { - *csl = clt; - *snl = slt; - *csr = crt; - *snr = srt; - } - -/* Correct signs of SSMAX and SSMIN */ - - if (pmax == 1) { - tsign = d_sign(&c_b2453, csr) * d_sign(&c_b2453, csl) * d_sign(& - c_b2453, f); - } - if (pmax == 2) { - tsign = d_sign(&c_b2453, snr) * d_sign(&c_b2453, csl) * d_sign(& - c_b2453, g); - } - if (pmax == 3) { - tsign = d_sign(&c_b2453, snr) * d_sign(&c_b2453, snl) * d_sign(& - c_b2453, h__); - } - *ssmax = d_sign(ssmax, &tsign); - d__1 = tsign * d_sign(&c_b2453, f) * d_sign(&c_b2453, h__); - *ssmin = d_sign(ssmin, &d__1); - return 0; - -/* End of DLASV2 */ - -} /* dlasv2_ */ - -/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer - *k1, integer *k2, integer *ipiv, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; - static doublereal temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DLASWP performs a series of row interchanges on the matrix A. - One row interchange is initiated for each of rows K1 through K2 of A. - - Arguments - ========= - - N (input) INTEGER - The number of columns of the matrix A. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the matrix of column dimension N to which the row - interchanges will be applied. - On exit, the permuted matrix. - - LDA (input) INTEGER - The leading dimension of the array A. - - K1 (input) INTEGER - The first element of IPIV for which a row interchange will - be done. - - K2 (input) INTEGER - The last element of IPIV for which a row interchange will - be done. - - IPIV (input) INTEGER array, dimension (M*abs(INCX)) - The vector of pivot indices. Only the elements in positions - K1 through K2 of IPIV are accessed. - IPIV(K) = L implies rows K and L are to be interchanged. - - INCX (input) INTEGER - The increment between successive values of IPIV. If IPIV - is negative, the pivots are applied in reverse order. - - Further Details - =============== - - Modified by - R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA - - ===================================================================== - - - Interchange row I with row IPIV(I) for each of rows K1 through K2. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - if (*incx > 0) { - ix0 = *k1; - i1 = *k1; - i2 = *k2; - inc = 1; - } else if (*incx < 0) { - ix0 = (1 - *k2) * *incx + 1; - i1 = *k2; - i2 = *k1; - inc = -1; - } else { - return 0; - } - - n32 = *n / 32 << 5; - if (n32 != 0) { - i__1 = n32; - for (j = 1; j <= i__1; j += 32) { - ix = ix0; - i__2 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) - { - ip = ipiv[ix]; - if (ip != i__) { - i__4 = j + 31; - for (k = j; k <= i__4; ++k) { - temp = a[i__ + k * a_dim1]; - a[i__ + k * a_dim1] = a[ip + k * a_dim1]; - a[ip + k * a_dim1] = temp; -/* L10: */ - } - } - ix += *incx; -/* L20: */ - } -/* L30: */ - } - } - if (n32 != *n) { - ++n32; - ix = ix0; - i__1 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { - ip = ipiv[ix]; - if (ip != i__) { - i__2 = *n; - for (k = n32; k <= i__2; ++k) { - temp = a[i__ + k * a_dim1]; - a[i__ + k * a_dim1] = a[ip + k * a_dim1]; - a[ip + k * a_dim1] = temp; -/* L40: */ - } - } - ix += *incx; -/* L50: */ - } - } - - return 0; - -/* End of DLASWP */ - -} /* dlaswp_ */ - -/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal * - a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, - integer *ldw) -{ - /* System generated locals */ - integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, iw; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - static doublereal alpha; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *), daxpy_(integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *), - dsymv_(char *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *, - doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DLATRD reduces NB rows and columns of a real symmetric matrix A to - symmetric tridiagonal form by an orthogonal similarity - transformation Q' * A * Q, and returns the matrices V and W which are - needed to apply the transformation to the unreduced part of A. - - If UPLO = 'U', DLATRD reduces the last NB rows and columns of a - matrix, of which the upper triangle is supplied; - if UPLO = 'L', DLATRD reduces the first NB rows and columns of a - matrix, of which the lower triangle is supplied. - - This is an auxiliary routine called by DSYTRD. - - Arguments - ========= - - UPLO (input) CHARACTER - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. - - NB (input) INTEGER - The number of rows and columns to be reduced. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit: - if UPLO = 'U', the last NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements above the diagonal - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors; - if UPLO = 'L', the first NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements below the diagonal - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= (1,N). - - E (output) DOUBLE PRECISION array, dimension (N-1) - If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal - elements of the last NB columns of the reduced matrix; - if UPLO = 'L', E(1:nb) contains the subdiagonal elements of - the first NB columns of the reduced matrix. - - TAU (output) DOUBLE PRECISION array, dimension (N-1) - The scalar factors of the elementary reflectors, stored in - TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. - See Further Details. - - W (output) DOUBLE PRECISION array, dimension (LDW,NB) - The n-by-nb matrix W required to update the unreduced part - of A. - - LDW (input) INTEGER - The leading dimension of the array W. LDW >= max(1,N). - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n) H(n-1) . . . H(n-nb+1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), - and tau in TAU(i-1). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), - and tau in TAU(i). - - The elements of the vectors v together form the n-by-nb matrix V - which is needed, with W, to apply the transformation to the unreduced - part of the matrix, using a symmetric rank-2k update of the form: - A := A - V*W' - W*V'. - - The contents of A on exit are illustrated by the following examples - with n = 5 and nb = 2: - - if UPLO = 'U': if UPLO = 'L': - - ( a a a v4 v5 ) ( d ) - ( a a v4 v5 ) ( 1 d ) - ( a 1 v5 ) ( v1 1 a ) - ( d 1 ) ( v1 v2 a a ) - ( d ) ( v1 v2 a a a ) - - where d denotes a diagonal element of the reduced matrix, a denotes - an element of the original matrix that is unchanged, and vi denotes - an element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --e; - --tau; - w_dim1 = *ldw; - w_offset = 1 + w_dim1; - w -= w_offset; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - - if (lsame_(uplo, "U")) { - -/* Reduce last NB columns of upper triangle */ - - i__1 = *n - *nb + 1; - for (i__ = *n; i__ >= i__1; --i__) { - iw = i__ - *n + *nb; - if (i__ < *n) { - -/* Update A(1:i,i) */ - - i__2 = *n - i__; - dgemv_("No transpose", &i__, &i__2, &c_b2589, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & - c_b2453, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - dgemv_("No transpose", &i__, &i__2, &c_b2589, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b2453, &a[i__ * a_dim1 + 1], &c__1); - } - if (i__ > 1) { - -/* - Generate elementary reflector H(i) to annihilate - A(1:i-2,i) -*/ - - i__2 = i__ - 1; - dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + - 1], &c__1, &tau[i__ - 1]); - e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; - a[i__ - 1 + i__ * a_dim1] = 1.; - -/* Compute W(1:i-1,i) */ - - i__2 = i__ - 1; - dsymv_("Upper", &i__2, &c_b2453, &a[a_offset], lda, &a[i__ * - a_dim1 + 1], &c__1, &c_b2467, &w[iw * w_dim1 + 1], & - c__1); - if (i__ < *n) { - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & - c_b2467, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[(i__ + - 1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], - &c__1, &c_b2453, &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & - c_b2467, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[(iw + 1) - * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b2453, &w[iw * w_dim1 + 1], &c__1); - } - i__2 = i__ - 1; - dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1], - &c__1, &a[i__ * a_dim1 + 1], &c__1); - i__2 = i__ - 1; - daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * - w_dim1 + 1], &c__1); - } - -/* L10: */ - } - } else { - -/* Reduce first NB columns of lower triangle */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + a_dim1], - lda, &w[i__ + w_dim1], ldw, &c_b2453, &a[i__ + i__ * - a_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[i__ + w_dim1], - ldw, &a[i__ + a_dim1], lda, &c_b2453, &a[i__ + i__ * - a_dim1], &c__1); - if (i__ < *n) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:n,i) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + - i__ * a_dim1], &c__1, &tau[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.; - -/* Compute W(i+1:n,i) */ - - i__2 = *n - i__; - dsymv_("Lower", &i__2, &c_b2453, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b2467, &w[i__ + 1 + i__ * w_dim1], &c__1) - ; - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &w[i__ + 1 + - w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b2467, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 + - a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2453, & - w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b2467, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[i__ + 1 + - w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2453, & - w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ * - w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - i__2 = *n - i__; - daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - } - -/* L20: */ - } - } - - return 0; - -/* End of DLATRD */ - -} /* dlatrd_ */ - -/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer * - lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - static doublereal aii; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLAUU2 computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the unblocked form of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAUU2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - aii = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - i__2 = *n - i__ + 1; - a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], - lda, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - aii, &a[i__ * a_dim1 + 1], &c__1); - } else { - dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); - } -/* L10: */ - } - - } else { - -/* Compute the product L' * L. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - aii = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - i__2 = *n - i__ + 1; - a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], & - c__1, &a[i__ + i__ * a_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, - &a[i__ + a_dim1], lda); - } else { - dscal_(&i__, &aii, &a[i__ + a_dim1], lda); - } -/* L20: */ - } - } - - return 0; - -/* End of DLAUU2 */ - -} /* dlauu2_ */ - -/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer * - lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, ib, nb; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, doublereal *, - integer *), dlauu2_(char *, integer *, - doublereal *, integer *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DLAUUM computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the blocked form of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DLAUUM", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - dlauu2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, - &c_b2453, &a[i__ + i__ * a_dim1], lda, &a[i__ * - a_dim1 + 1], lda); - dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & - c_b2453, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ - + (i__ + ib) * a_dim1], lda, &c_b2453, &a[i__ * - a_dim1 + 1], lda); - i__3 = *n - i__ - ib + 1; - dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b2453, &a[ - i__ + (i__ + ib) * a_dim1], lda, &c_b2453, &a[i__ - + i__ * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the product L' * L. */ - - i__2 = *n; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & - c_b2453, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1] - , lda); - dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & - c_b2453, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ - + ib + a_dim1], lda, &c_b2453, &a[i__ + a_dim1], - lda); - i__3 = *n - i__ - ib + 1; - dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b2453, &a[i__ - + ib + i__ * a_dim1], lda, &c_b2453, &a[i__ + i__ - * a_dim1], lda); - } -/* L20: */ - } - } - } - - return 0; - -/* End of DLAUUM */ - -} /* dlauum_ */ - -/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal * - a, integer *lda, doublereal *tau, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal d__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dlarf_(char *, integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DORG2R generates an m by n real matrix Q with orthonormal columns, - which is defined as the first n columns of a product of k elementary - reflectors of order m - - Q = H(1) H(2) . . . H(k) - - as returned by DGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by DGEQRF in the first k columns of its array - argument A. - On exit, the m-by-n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQRF. - - WORK (workspace) DOUBLE PRECISION array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORG2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - -/* Initialise columns k+1:n to columns of the unit matrix */ - - i__1 = *n; - for (j = *k + 1; j <= i__1; ++j) { - i__2 = *m; - for (l = 1; l <= i__2; ++l) { - a[l + j * a_dim1] = 0.; -/* L10: */ - } - a[j + j * a_dim1] = 1.; -/* L20: */ - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the left */ - - if (i__ < *n) { - a[i__ + i__ * a_dim1] = 1.; - i__1 = *m - i__ + 1; - i__2 = *n - i__; - dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - } - if (i__ < *m) { - i__1 = *m - i__; - d__1 = -tau[i__]; - dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - } - a[i__ + i__ * a_dim1] = 1. - tau[i__]; - -/* Set A(1:i-1,i) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - a[l + i__ * a_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of DORG2R */ - -} /* dorg2r_ */ - -/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k, - doublereal *a, integer *lda, doublereal *tau, doublereal *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, mn; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical wantq; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dorglq_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *), dorgqr_(integer *, integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORGBR generates one of the real orthogonal matrices Q or P**T - determined by DGEBRD when reducing a real matrix A to bidiagonal - form: A = Q * B * P**T. Q and P**T are defined as products of - elementary reflectors H(i) or G(i) respectively. - - If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q - is of order M: - if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n - columns of Q, where m >= n >= k; - if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an - M-by-M matrix. - - If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T - is of order N: - if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m - rows of P**T, where n >= m >= k; - if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as - an N-by-N matrix. - - Arguments - ========= - - VECT (input) CHARACTER*1 - Specifies whether the matrix Q or the matrix P**T is - required, as defined in the transformation applied by DGEBRD: - = 'Q': generate Q; - = 'P': generate P**T. - - M (input) INTEGER - The number of rows of the matrix Q or P**T to be returned. - M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q or P**T to be returned. - N >= 0. - If VECT = 'Q', M >= N >= min(M,K); - if VECT = 'P', N >= M >= min(N,K). - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original M-by-K - matrix reduced by DGEBRD. - If VECT = 'P', the number of rows in the original K-by-N - matrix reduced by DGEBRD. - K >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by DGEBRD. - On exit, the M-by-N matrix Q or P**T. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (input) DOUBLE PRECISION array, dimension - (min(M,K)) if VECT = 'Q' - (min(N,K)) if VECT = 'P' - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i), which determines Q or P**T, as - returned by DGEBRD in its array argument TAUQ or TAUP. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,min(M,N)). - For optimum performance LWORK >= min(M,N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - wantq = lsame_(vect, "Q"); - mn = min(*m,*n); - lquery = *lwork == -1; - if (! wantq && ! lsame_(vect, "P")) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( - *m > *n || *m < min(*n,*k))) { - *info = -3; - } else if (*k < 0) { - *info = -4; - } else if (*lda < max(1,*m)) { - *info = -6; - } else if (*lwork < max(1,mn) && ! lquery) { - *info = -9; - } - - if (*info == 0) { - if (wantq) { - nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } else { - nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } - lwkopt = max(1,mn) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORGBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - work[1] = 1.; - return 0; - } - - if (wantq) { - -/* - Form Q, determined by a call to DGEBRD to reduce an m-by-k - matrix -*/ - - if (*m >= *k) { - -/* If m >= k, assume m >= n >= k */ - - dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If m < k, assume m = n - - Shift the vectors which define the elementary reflectors one - column to the right, and set the first row and column of Q - to those of the unit matrix -*/ - - for (j = *m; j >= 2; --j) { - a[j * a_dim1 + 1] = 0.; - i__1 = *m; - for (i__ = j + 1; i__ <= i__1; ++i__) { - a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; -/* L10: */ - } -/* L20: */ - } - a[a_dim1 + 1] = 1.; - i__1 = *m; - for (i__ = 2; i__ <= i__1; ++i__) { - a[i__ + a_dim1] = 0.; -/* L30: */ - } - if (*m > 1) { - -/* Form Q(2:m,2:m) */ - - i__1 = *m - 1; - i__2 = *m - 1; - i__3 = *m - 1; - dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } else { - -/* - Form P', determined by a call to DGEBRD to reduce a k-by-n - matrix -*/ - - if (*k < *n) { - -/* If k < n, assume k <= m <= n */ - - dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If k >= n, assume m = n - - Shift the vectors which define the elementary reflectors one - row downward, and set the first row and column of P' to - those of the unit matrix -*/ - - a[a_dim1 + 1] = 1.; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - a[i__ + a_dim1] = 0.; -/* L40: */ - } - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - for (i__ = j - 1; i__ >= 2; --i__) { - a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; -/* L50: */ - } - a[j * a_dim1 + 1] = 0.; -/* L60: */ - } - if (*n > 1) { - -/* Form P'(2:n,2:n) */ - - i__1 = *n - 1; - i__2 = *n - 1; - i__3 = *n - 1; - dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORGBR */ - -} /* dorgbr_ */ - -/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi, - doublereal *a, integer *lda, doublereal *tau, doublereal *work, - integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, nb, nh, iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORGHR generates a real orthogonal matrix Q which is defined as the - product of IHI-ILO elementary reflectors of order N, as returned by - DGEHRD: - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Arguments - ========= - - N (input) INTEGER - The order of the matrix Q. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - ILO and IHI must have the same values as in the previous call - of DGEHRD. Q is equal to the unit matrix except in the - submatrix Q(ilo+1:ihi,ilo+1:ihi). - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by DGEHRD. - On exit, the N-by-N orthogonal matrix Q. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (input) DOUBLE PRECISION array, dimension (N-1) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEHRD. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= IHI-ILO. - For optimum performance LWORK >= (IHI-ILO)*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nh = *ihi - *ilo; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,nh) && ! lquery) { - *info = -8; - } - - if (*info == 0) { - nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( - ftnlen)1); - lwkopt = max(1,nh) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORGHR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1] = 1.; - return 0; - } - -/* - Shift the vectors which define the elementary reflectors one - column to the right, and set the first ilo and the last n-ihi - rows and columns to those of the unit matrix -*/ - - i__1 = *ilo + 1; - for (j = *ihi; j >= i__1; --j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L10: */ - } - i__2 = *ihi; - for (i__ = j + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; -/* L20: */ - } - i__2 = *n; - for (i__ = *ihi + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } - i__1 = *ilo; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L50: */ - } - a[j + j * a_dim1] = 1.; -/* L60: */ - } - i__1 = *n; - for (j = *ihi + 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L70: */ - } - a[j + j * a_dim1] = 1.; -/* L80: */ - } - - if (nh > 0) { - -/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ - - dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* - ilo], &work[1], lwork, &iinfo); - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORGHR */ - -} /* dorghr_ */ - -/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal * - a, integer *lda, doublereal *tau, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal d__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dlarf_(char *, integer *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORGL2 generates an m by n real matrix Q with orthonormal rows, - which is defined as the first m rows of a product of k elementary - reflectors of order n - - Q = H(k) . . . H(2) H(1) - - as returned by DGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by DGELQF in the first k rows of its array argument A. - On exit, the m-by-n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGELQF. - - WORK (workspace) DOUBLE PRECISION array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORGL2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - return 0; - } - - if (*k < *m) { - -/* Initialise rows k+1:m to rows of the unit matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (l = *k + 1; l <= i__2; ++l) { - a[l + j * a_dim1] = 0.; -/* L10: */ - } - if (j > *k && j <= *m) { - a[j + j * a_dim1] = 1.; - } -/* L20: */ - } - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the right */ - - if (i__ < *n) { - if (i__ < *m) { - a[i__ + i__ * a_dim1] = 1.; - i__1 = *m - i__; - i__2 = *n - i__ + 1; - dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & - tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__1 = *n - i__; - d__1 = -tau[i__]; - dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); - } - a[i__ + i__ * a_dim1] = 1. - tau[i__]; - -/* Set A(i,1:i-1) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - a[i__ + l * a_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of DORGL2 */ - -} /* dorgl2_ */ - -/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal * - a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), - dlarfb_(char *, char *, char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORGLQ generates an M-by-N real matrix Q with orthonormal rows, - which is defined as the first M rows of a product of K elementary - reflectors of order N - - Q = H(k) . . . H(2) H(1) - - as returned by DGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by DGELQF in the first k rows of its array argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGELQF. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*m) * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORGLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk rows are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(kk+1:m,1:kk) to zero. */ - - i__1 = kk; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = kk + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *m) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *n - i__ + 1; - dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i+ib:m,i:n) from the right */ - - i__2 = *m - i__ - ib + 1; - i__3 = *n - i__ + 1; - dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & - i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } - -/* Apply H' to columns i:n of current block */ - - i__2 = *n - i__ + 1; - dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set columns 1:i-1 of current block to zero */ - - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = i__ + ib - 1; - for (l = i__; l <= i__3; ++l) { - a[l + j * a_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1] = (doublereal) iws; - return 0; - -/* End of DORGLQ */ - -} /* dorglq_ */ - -/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal * - a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), - dlarfb_(char *, char *, char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORGQR generates an M-by-N real matrix Q with orthonormal columns, - which is defined as the first N columns of a product of K elementary - reflectors of order M - - Q = H(1) H(2) . . . H(k) - - as returned by DGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by DGEQRF in the first k columns of its array - argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQRF. - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*n) * nb; - work[1] = (doublereal) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORGQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk columns are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(1:kk,kk+1:n) to zero. */ - - i__1 = *n; - for (j = kk + 1; j <= i__1; ++j) { - i__2 = kk; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *n) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *m - i__ + 1; - dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i:m,i+ib:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__ - ib + 1; - dlarfb_("Left", "No transpose", "Forward", "Columnwise", & - i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & - work[ib + 1], &ldwork); - } - -/* Apply H to rows i:m of current block */ - - i__2 = *m - i__ + 1; - dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set rows 1:i-1 of current block to zero */ - - i__2 = i__ + ib - 1; - for (j = i__; j <= i__2; ++j) { - i__3 = i__ - 1; - for (l = 1; l <= i__3; ++l) { - a[l + j * a_dim1] = 0.; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1] = (doublereal) iws; - return 0; - -/* End of DORGQR */ - -} /* dorgqr_ */ - -/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, mi, ni, nq; - static doublereal aii; - static logical left; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DORM2L overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQLF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORM2L", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(1:m-k+i,1:n) */ - - mi = *m - *k + i__; - } else { - -/* H(i) is applied to C(1:m,1:n-k+i) */ - - ni = *n - *k + i__; - } - -/* Apply H(i) */ - - aii = a[nq - *k + i__ + i__ * a_dim1]; - a[nq - *k + i__ + i__ * a_dim1] = 1.; - dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ - c_offset], ldc, &work[1]); - a[nq - *k + i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of DORM2L */ - -} /* dorm2l_ */ - -/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static doublereal aii; - static logical left; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DORM2R overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQRF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORM2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ - ic + jc * c_dim1], ldc, &work[1]); - a[i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of DORM2R */ - -} /* dorm2r_ */ - -/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, - integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, - doublereal *c__, integer *ldc, doublereal *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *); - static logical notran; - extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *); - static logical applyq; - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': P * C C * P - TRANS = 'T': P**T * C C * P**T - - Here Q and P**T are the orthogonal matrices determined by DGEBRD when - reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and - P**T are defined as products of elementary reflectors H(i) and G(i) - respectively. - - Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the - order of the orthogonal matrix Q or P**T that is applied. - - If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: - if nq >= k, Q = H(1) H(2) . . . H(k); - if nq < k, Q = H(1) H(2) . . . H(nq-1). - - If VECT = 'P', A is assumed to have been a K-by-NQ matrix: - if k < nq, P = G(1) G(2) . . . G(k); - if k >= nq, P = G(1) G(2) . . . G(nq-1). - - Arguments - ========= - - VECT (input) CHARACTER*1 - = 'Q': apply Q or Q**T; - = 'P': apply P or P**T. - - SIDE (input) CHARACTER*1 - = 'L': apply Q, Q**T, P or P**T from the Left; - = 'R': apply Q, Q**T, P or P**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q or P; - = 'T': Transpose, apply Q**T or P**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original - matrix reduced by DGEBRD. - If VECT = 'P', the number of rows in the original - matrix reduced by DGEBRD. - K >= 0. - - A (input) DOUBLE PRECISION array, dimension - (LDA,min(nq,K)) if VECT = 'Q' - (LDA,nq) if VECT = 'P' - The vectors which define the elementary reflectors H(i) and - G(i), whose products determine the matrices Q and P, as - returned by DGEBRD. - - LDA (input) INTEGER - The leading dimension of the array A. - If VECT = 'Q', LDA >= max(1,nq); - if VECT = 'P', LDA >= max(1,min(nq,K)). - - TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i) which determines Q or P, as returned - by DGEBRD in the array argument TAUQ or TAUP. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q - or P*C or P**T*C or C*P or C*P**T. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - applyq = lsame_(vect, "Q"); - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! applyq && ! lsame_(vect, "P")) { - *info = -1; - } else if (! left && ! lsame_(side, "R")) { - *info = -2; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*k < 0) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = 1, i__2 = min(nq,*k); - if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { - *info = -8; - } else if (*ldc < max(1,*m)) { - *info = -11; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -13; - } - } - - if (*info == 0) { - if (applyq) { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORMBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - work[1] = 1.; - if (*m == 0 || *n == 0) { - return 0; - } - - if (applyq) { - -/* Apply Q */ - - if (nq >= *k) { - -/* Q was determined by a call to DGEBRD with nq >= k */ - - dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* Q was determined by a call to DGEBRD with nq < k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] - , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - } else { - -/* Apply P */ - - if (notran) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - if (nq > *k) { - -/* P was determined by a call to DGEBRD with nq > k */ - - dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* P was determined by a call to DGEBRD with nq <= k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, - &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & - iinfo); - } - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORMBR */ - -} /* dormbr_ */ - -/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static doublereal aii; - static logical left; - extern /* Subroutine */ int dlarf_(char *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DORML2 overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGELQF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) DOUBLE PRECISION array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORML2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.; - dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ - ic + jc * c_dim1], ldc, &work[1]); - a[i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of DORML2 */ - -} /* dorml2_ */ - -/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublereal t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *), dlarfb_(char - *, char *, char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork; - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORMLQ overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGELQF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORMLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - if (notran) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], - lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ - + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], - ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORMLQ */ - -} /* dormlq_ */ - -/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublereal t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *), dlarfb_(char - *, char *, char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORMQL overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQLF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORMQL", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i+ib-1) . . . H(i+1) H(i) -*/ - - i__4 = nq - *k + i__ + ib - 1; - dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] - , lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ - - mi = *m - *k + i__ + ib - 1; - } else { - -/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ - - ni = *n - *k + i__ + ib - 1; - } - -/* Apply H or H' */ - - dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ - i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & - work[1], &ldwork); -/* L10: */ - } - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORMQL */ - -} /* dormql_ */ - -/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n, - integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublereal t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *), dlarfb_(char - *, char *, char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal - *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORMQR overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - DGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) DOUBLE PRECISION array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DGEQRF. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DORMQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], t, &c__65) - ; - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ - i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * - c_dim1], ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORMQR */ - -} /* dormqr_ */ - -/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m, - integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * - c__, integer *ldc, doublereal *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *), - dormqr_(char *, char *, integer *, integer *, integer *, - doublereal *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DORMTR overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix of order nq, with nq = m if - SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of - nq-1 elementary reflectors, as returned by DSYTRD: - - if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); - - if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A contains elementary reflectors - from DSYTRD; - = 'L': Lower triangle of A contains elementary reflectors - from DSYTRD. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - A (input) DOUBLE PRECISION array, dimension - (LDA,M) if SIDE = 'L' - (LDA,N) if SIDE = 'R' - The vectors which define the elementary reflectors, as - returned by DSYTRD. - - LDA (input) INTEGER - The leading dimension of the array A. - LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. - - TAU (input) DOUBLE PRECISION array, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by DSYTRD. - - C (input/output) DOUBLE PRECISION array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! upper && ! lsame_(uplo, "L")) { - *info = -2; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - if (upper) { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__2 = -(*info); - xerbla_("DORMTR", &i__2); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || nq == 1) { - work[1] = 1.; - return 0; - } - - if (left) { - mi = *m - 1; - ni = *n; - } else { - mi = *m; - ni = *n - 1; - } - - if (upper) { - -/* Q was determined by a call to DSYTRD with UPLO = 'U' */ - - i__2 = nq - 1; - dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & - tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); - } else { - -/* Q was determined by a call to DSYTRD with UPLO = 'L' */ - - if (left) { - i1 = 2; - i2 = 1; - } else { - i1 = 1; - i2 = 2; - } - i__2 = nq - 1; - dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & - c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DORMTR */ - -} /* dormtr_ */ - -/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer * - lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer j; - static doublereal ajj; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DPOTF2 computes the Cholesky factorization of a real symmetric - positive definite matrix A. - - The factorization has the form - A = U' * U , if UPLO = 'U', or - A = L * L', if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the unblocked version of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored. - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n by n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U'*U or A = L*L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, the leading minor of order k is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DPOTF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute U(J,J) and test for non-positive-definiteness. */ - - i__2 = j - 1; - ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, - &a[j * a_dim1 + 1], &c__1); - if (ajj <= 0.) { - a[j + j * a_dim1] = ajj; - goto L30; - } - ajj = sqrt(ajj); - a[j + j * a_dim1] = ajj; - -/* Compute elements J+1:N of row J. */ - - if (j < *n) { - i__2 = j - 1; - i__3 = *n - j; - dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(j + 1) * - a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b2453, - &a[j + (j + 1) * a_dim1], lda); - i__2 = *n - j; - d__1 = 1. / ajj; - dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute L(J,J) and test for non-positive-definiteness. */ - - i__2 = j - 1; - ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j - + a_dim1], lda); - if (ajj <= 0.) { - a[j + j * a_dim1] = ajj; - goto L30; - } - ajj = sqrt(ajj); - a[j + j * a_dim1] = ajj; - -/* Compute elements J+1:N of column J. */ - - if (j < *n) { - i__2 = *n - j; - i__3 = j - 1; - dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[j + 1 + - a_dim1], lda, &a[j + a_dim1], lda, &c_b2453, &a[j + 1 - + j * a_dim1], &c__1); - i__2 = *n - j; - d__1 = 1. / ajj; - dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - goto L40; - -L30: - *info = j; - -L40: - return 0; - -/* End of DPOTF2 */ - -} /* dpotf2_ */ - -/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer * - lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer j, jb, nb; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, doublereal *, - integer *), dpotf2_(char *, integer *, - doublereal *, integer *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DPOTRF computes the Cholesky factorization of a real symmetric - positive definite matrix A. - - The factorization has the form - A = U**T * U, if UPLO = 'U', or - A = L * L**T, if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the block version of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U**T*U or A = L*L**T. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DPOTRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code. */ - - dpotf2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code. */ - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b2589, &a[j * - a_dim1 + 1], lda, &c_b2453, &a[j + j * a_dim1], lda); - dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block row. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & - c_b2589, &a[j * a_dim1 + 1], lda, &a[(j + jb) * - a_dim1 + 1], lda, &c_b2453, &a[j + (j + jb) * - a_dim1], lda); - i__3 = *n - j - jb + 1; - dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & - i__3, &c_b2453, &a[j + j * a_dim1], lda, &a[j + ( - j + jb) * a_dim1], lda); - } -/* L10: */ - } - - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__2 = *n; - i__1 = nb; - for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b2589, &a[j + - a_dim1], lda, &c_b2453, &a[j + j * a_dim1], lda); - dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block column. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & - c_b2589, &a[j + jb + a_dim1], lda, &a[j + a_dim1], - lda, &c_b2453, &a[j + jb + j * a_dim1], lda); - i__3 = *n - j - jb + 1; - dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & - jb, &c_b2453, &a[j + j * a_dim1], lda, &a[j + jb - + j * a_dim1], lda); - } -/* L20: */ - } - } - } - goto L40; - -L30: - *info = *info + j - 1; - -L40: - return 0; - -/* End of DPOTRF */ - -} /* dpotrf_ */ - -/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer * - lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_( - char *, integer *, doublereal *, integer *, integer *), - dtrtri_(char *, char *, integer *, doublereal *, integer *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DPOTRI computes the inverse of a real symmetric positive definite - matrix A using the Cholesky factorization A = U**T*U or A = L*L**T - computed by DPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the triangular factor U or L from the Cholesky - factorization A = U**T*U or A = L*L**T, as computed by - DPOTRF. - On exit, the upper or lower triangle of the (symmetric) - inverse of A, overwriting the input factor U or L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the (i,i) element of the factor U or L is - zero, and the inverse could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DPOTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Invert the triangular Cholesky factor U or L. */ - - dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); - if (*info > 0) { - return 0; - } - -/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ - - dlauum_(uplo, n, &a[a_offset], lda, info); - - return 0; - -/* End of DPOTRI */ - -} /* dpotri_ */ - -/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs, - doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DPOTRS solves a system of linear equations A*X = B with a symmetric - positive definite matrix A using the Cholesky factorization - A = U**T*U or A = L*L**T computed by DPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The triangular factor U or L from the Cholesky factorization - A = U**T*U or A = L*L**T, as computed by DPOTRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DPOTRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (upper) { - -/* - Solve A*X = B where A = U'*U. - - Solve U'*X = B, overwriting B with X. -*/ - - dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2453, & - a[a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2453, - &a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A*X = B where A = L*L'. - - Solve L*X = B, overwriting B with X. -*/ - - dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b2453, - &a[a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b2453, & - a[a_offset], lda, &b[b_offset], ldb); - } - - return 0; - -/* End of DPOTRS */ - -} /* dpotrs_ */ - -/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__, - doublereal *e, doublereal *z__, integer *ldz, doublereal *work, - integer *lwork, integer *iwork, integer *liwork, integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k, m; - static doublereal p; - static integer ii, end, lgn; - static doublereal eps, tiny; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer lwmin; - extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, integer *, integer *); - static integer start; - - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlacpy_(char *, integer *, integer - *, doublereal *, integer *, doublereal *, integer *), - dlaset_(char *, integer *, integer *, doublereal *, doublereal *, - doublereal *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, - integer *), dlasrt_(char *, integer *, doublereal *, integer *); - static integer liwmin, icompz; - extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *); - static doublereal orgnrm; - static logical lquery; - static integer smlsiz, dtrtrw, storez; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DSTEDC computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - The eigenvectors of a full or band real symmetric matrix can also be - found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this - matrix to tridiagonal form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See DLAED3 for details. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'I': Compute eigenvectors of tridiagonal matrix also. - = 'V': Compute eigenvectors of original dense symmetric - matrix also. On entry, Z contains the orthogonal - matrix used to reduce the original matrix to - tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the subdiagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) - On entry, if COMPZ = 'V', then Z contains the orthogonal - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original symmetric matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1. - If eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace/output) DOUBLE PRECISION array, - dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. - If COMPZ = 'V' and N > 1 then LWORK must be at least - ( 1 + 3*N + 2*N*lg N + 3*N**2 ), - where lg( N ) = smallest integer k such - that 2**k >= N. - If COMPZ = 'I' and N > 1 then LWORK must be at least - ( 1 + 4*N + N**2 ). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. - If COMPZ = 'V' and N > 1 then LIWORK must be at least - ( 6 + 6*N + 5*N*lg N ). - If COMPZ = 'I' and N > 1 then LIWORK must be at least - ( 3 + 5*N ). - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1 || *liwork == -1; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (*n <= 1 || icompz <= 0) { - liwmin = 1; - lwmin = 1; - } else { - lgn = (integer) (log((doublereal) (*n)) / log(2.)); - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (icompz == 1) { -/* Computing 2nd power */ - i__1 = *n; - lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; - liwmin = *n * 6 + 6 + *n * 5 * lgn; - } else if (icompz == 2) { -/* Computing 2nd power */ - i__1 = *n; - lwmin = (*n << 2) + 1 + i__1 * i__1; - liwmin = *n * 5 + 3; - } - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*liwork < liwmin && ! lquery) { - *info = -10; - } - - if (*info == 0) { - work[1] = (doublereal) lwmin; - iwork[1] = liwmin; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DSTEDC", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*n == 1) { - if (icompz != 0) { - z__[z_dim1 + 1] = 1.; - } - return 0; - } - - smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - If the following conditional clause is removed, then the routine - will use the Divide and Conquer routine to compute only the - eigenvalues, which requires (3N + 3N**2) real workspace and - (2 + 5N + 2N lg(N)) integer workspace. - Since on many architectures DSTERF is much faster than any other - algorithm for finding eigenvalues only, it is used here - as the default. - - If COMPZ = 'N', use DSTERF to compute the eigenvalues. -*/ - - if (icompz == 0) { - dsterf_(n, &d__[1], &e[1], info); - return 0; - } - -/* - If N is smaller than the minimum divide size (SMLSIZ+1), then - solve the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompz == 0) { - dsterf_(n, &d__[1], &e[1], info); - return 0; - } else if (icompz == 2) { - dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], - info); - return 0; - } else { - dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], - info); - return 0; - } - } - -/* - If COMPZ = 'V', the Z matrix must be stored elsewhere for later - use. -*/ - - if (icompz == 1) { - storez = *n * *n + 1; - } else { - storez = 1; - } - - if (icompz == 2) { - dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz); - } - -/* Scale. */ - - orgnrm = dlanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.) { - return 0; - } - - eps = EPSILON; - - start = 1; - -/* while ( START <= N ) */ - -L10: - if (start <= *n) { - -/* - Let END be the position of the next subdiagonal entry such that - E( END ) <= TINY or END = N if no such subdiagonal exists. The - matrix identified by the elements between START and END - constitutes an independent sub-problem. -*/ - - end = start; -L20: - if (end < *n) { - tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 = - d__[end + 1], abs(d__2))); - if ((d__1 = e[end], abs(d__1)) > tiny) { - ++end; - goto L20; - } - } - -/* (Sub) Problem determined. Compute its size and solve it. */ - - m = end - start + 1; - if (m == 1) { - start = end + 1; - goto L10; - } - if (m > smlsiz) { - *info = smlsiz; - -/* Scale. */ - - orgnrm = dlanst_("M", &m, &d__[start], &e[start]); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &m, &c__1, &d__[ - start], &m, info); - i__1 = m - 1; - i__2 = m - 1; - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &i__1, &c__1, &e[ - start], &i__2, info); - - if (icompz == 1) { - dtrtrw = 1; - } else { - dtrtrw = start; - } - dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw + - start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[ - 1], info); - if (*info != 0) { - *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m - + 1) + start - 1; - return 0; - } - -/* Scale back. */ - - dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &m, &c__1, &d__[ - start], &m, info); - - } else { - if (icompz == 1) { - -/* - Since QR won't update a Z matrix which is larger than the - length of D, we must solve the sub-problem in a workspace and - then multiply back into Z. -*/ - - dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ - m * m + 1], info); - dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ - storez], n); - dgemm_("N", "N", n, &m, &m, &c_b2453, &work[storez], ldz, & - work[1], &m, &c_b2467, &z__[start * z_dim1 + 1], ldz); - } else if (icompz == 2) { - dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start * - z_dim1], ldz, &work[1], info); - } else { - dsterf_(&m, &d__[start], &e[start], info); - } - if (*info != 0) { - *info = start * (*n + 1) + end; - return 0; - } - } - - start = end + 1; - goto L10; - } - -/* - endwhile - - If the problem split any number of times, then the eigenvalues - will not be properly ordered. Here we permute the eigenvalues - (and the associated eigenvectors) into ascending order. -*/ - - if (m != *n) { - if (icompz == 0) { - -/* Use Quick Sort */ - - dlasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L30: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 - + 1], &c__1); - } -/* L40: */ - } - } - } - - work[1] = (doublereal) lwmin; - iwork[1] = liwmin; - - return 0; - -/* End of DSTEDC */ - -} /* dstedc_ */ - -/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__, - doublereal *e, doublereal *z__, integer *ldz, doublereal *work, - integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static doublereal b, c__, f, g; - static integer i__, j, k, l, m; - static doublereal p, r__, s; - static integer l1, ii, mm, lm1, mm1, nm1; - static doublereal rt1, rt2, eps; - static integer lsv; - static doublereal tst, eps2; - static integer lend, jtot; - extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal - *, doublereal *, doublereal *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *); - static doublereal anorm; - extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, - doublereal *, integer *), dlaev2_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *); - static integer lendm1, lendp1; - - static integer iscale; - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlaset_(char *, integer *, integer - *, doublereal *, doublereal *, doublereal *, integer *); - static doublereal safmin; - extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *); - static doublereal safmax; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, - integer *); - static integer lendsv; - static doublereal ssfmin; - static integer nmaxit, icompz; - static doublereal ssfmax; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - DSTEQR computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the implicit QL or QR method. - The eigenvectors of a full or band symmetric matrix can also be found - if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to - tridiagonal form. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'V': Compute eigenvalues and eigenvectors of the original - symmetric matrix. On entry, Z must contain the - orthogonal matrix used to reduce the original matrix - to tridiagonal form. - = 'I': Compute eigenvalues and eigenvectors of the - tridiagonal matrix. Z is initialized to the identity - matrix. - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) - On entry, if COMPZ = 'V', then Z contains the orthogonal - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original symmetric matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1, and if - eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) - If COMPZ = 'N', then WORK is not referenced. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm has failed to find all the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero; on exit, D - and E contain the elements of a symmetric tridiagonal - matrix which is orthogonally similar to the original - matrix. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - *info = 0; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DSTEQR", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - if (icompz == 2) { - z__[z_dim1 + 1] = 1.; - } - return 0; - } - -/* Determine the unit roundoff and over/underflow thresholds. */ - - eps = EPSILON; -/* Computing 2nd power */ - d__1 = eps; - eps2 = d__1 * d__1; - safmin = SAFEMINIMUM; - safmax = 1. / safmin; - ssfmax = sqrt(safmax) / 3.; - ssfmin = sqrt(safmin) / eps2; - -/* - Compute the eigenvalues and eigenvectors of the tridiagonal - matrix. -*/ - - if (icompz == 2) { - dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz); - } - - nmaxit = *n * 30; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - nm1 = *n - 1; - -L10: - if (l1 > *n) { - goto L160; - } - if (l1 > 1) { - e[l1 - 1] = 0.; - } - if (l1 <= nm1) { - i__1 = nm1; - for (m = l1; m <= i__1; ++m) { - tst = (d__1 = e[m], abs(d__1)); - if (tst == 0.) { - goto L30; - } - if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m - + 1], abs(d__2))) * eps) { - e[m] = 0.; - goto L30; - } -/* L20: */ - } - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = dlanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm == 0.) { - goto L10; - } - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - -/* Choose between QL and QR iteration */ - - if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { - lend = lsv; - l = lendsv; - } - - if (lend > l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L40: - if (l != lend) { - lendm1 = lend - 1; - i__1 = lendm1; - for (m = l; m <= i__1; ++m) { -/* Computing 2nd power */ - d__2 = (d__1 = e[m], abs(d__1)); - tst = d__2 * d__2; - if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m - + 1], abs(d__2)) + safmin) { - goto L60; - } -/* L50: */ - } - } - - m = lend; - -L60: - if (m < lend) { - e[m] = 0.; - } - p = d__[l]; - if (m == l) { - goto L80; - } - -/* - If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l + 1) { - if (icompz > 0) { - dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); - work[l] = c__; - work[*n - 1 + l] = s; - dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & - z__[l * z_dim1 + 1], ldz); - } else { - dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); - } - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.; - l += 2; - if (l <= lend) { - goto L40; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l + 1] - p) / (e[l] * 2.); - r__ = dlapy2_(&g, &c_b2453); - g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); - - s = 1.; - c__ = 1.; - p = 0.; - -/* Inner loop */ - - mm1 = m - 1; - i__1 = l; - for (i__ = mm1; i__ >= i__1; --i__) { - f = s * e[i__]; - b = c__ * e[i__]; - dlartg_(&g, &f, &c__, &s, &r__); - if (i__ != m - 1) { - e[i__ + 1] = r__; - } - g = d__[i__ + 1] - p; - r__ = (d__[i__] - g) * s + c__ * 2. * b; - p = s * r__; - d__[i__ + 1] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = -s; - } - -/* L70: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = m - l + 1; - dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[l] = g; - goto L40; - -/* Eigenvalue found. */ - -L80: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L40; - } - goto L140; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L90: - if (l != lend) { - lendp1 = lend + 1; - i__1 = lendp1; - for (m = l; m >= i__1; --m) { -/* Computing 2nd power */ - d__2 = (d__1 = e[m - 1], abs(d__1)); - tst = d__2 * d__2; - if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m - - 1], abs(d__2)) + safmin) { - goto L110; - } -/* L100: */ - } - } - - m = lend; - -L110: - if (m > lend) { - e[m - 1] = 0.; - } - p = d__[l]; - if (m == l) { - goto L130; - } - -/* - If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l - 1) { - if (icompz > 0) { - dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) - ; - work[m] = c__; - work[*n - 1 + m] = s; - dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & - z__[(l - 1) * z_dim1 + 1], ldz); - } else { - dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); - } - d__[l - 1] = rt1; - d__[l] = rt2; - e[l - 1] = 0.; - l += -2; - if (l >= lend) { - goto L90; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l - 1] - p) / (e[l - 1] * 2.); - r__ = dlapy2_(&g, &c_b2453); - g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); - - s = 1.; - c__ = 1.; - p = 0.; - -/* Inner loop */ - - lm1 = l - 1; - i__1 = lm1; - for (i__ = m; i__ <= i__1; ++i__) { - f = s * e[i__]; - b = c__ * e[i__]; - dlartg_(&g, &f, &c__, &s, &r__); - if (i__ != m) { - e[i__ - 1] = r__; - } - g = d__[i__] - p; - r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; - p = s * r__; - d__[i__] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = s; - } - -/* L120: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = l - m + 1; - dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[lm1] = g; - goto L90; - -/* Eigenvalue found. */ - -L130: - d__[l] = p; - - --l; - if (l >= lend) { - goto L90; - } - goto L140; - - } - -/* Undo scaling if necessary */ - -L140: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } else if (iscale == 2) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot < nmaxit) { - goto L10; - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.) { - ++(*info); - } -/* L150: */ - } - goto L190; - -/* Order eigenvalues and eigenvectors. */ - -L160: - if (icompz == 0) { - -/* Use Quick Sort */ - - dlasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L170: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L180: */ - } - } - -L190: - return 0; - -/* End of DSTEQR */ - -} /* dsteqr_ */ - -/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e, - integer *info) -{ - /* System generated locals */ - integer i__1; - doublereal d__1, d__2, d__3; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static doublereal c__; - static integer i__, l, m; - static doublereal p, r__, s; - static integer l1; - static doublereal bb, rt1, rt2, eps, rte; - static integer lsv; - static doublereal eps2, oldc; - static integer lend, jtot; - extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal - *, doublereal *, doublereal *); - static doublereal gamma, alpha, sigma, anorm; - - static integer iscale; - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *); - static doublereal oldgam, safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal safmax; - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, - integer *); - static integer lendsv; - static doublereal ssfmin; - static integer nmaxit; - static doublereal ssfmax; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DSTERF computes all eigenvalues of a symmetric tridiagonal matrix - using the Pal-Walker-Kahan variant of the QL or QR algorithm. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the n diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm failed to find all of the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n < 0) { - *info = -1; - i__1 = -(*info); - xerbla_("DSTERF", &i__1); - return 0; - } - if (*n <= 1) { - return 0; - } - -/* Determine the unit roundoff for this environment. */ - - eps = EPSILON; -/* Computing 2nd power */ - d__1 = eps; - eps2 = d__1 * d__1; - safmin = SAFEMINIMUM; - safmax = 1. / safmin; - ssfmax = sqrt(safmax) / 3.; - ssfmin = sqrt(safmin) / eps2; - -/* Compute the eigenvalues of the tridiagonal matrix. */ - - nmaxit = *n * 30; - sigma = 0.; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - -L10: - if (l1 > *n) { - goto L170; - } - if (l1 > 1) { - e[l1 - 1] = 0.; - } - i__1 = *n - 1; - for (m = l1; m <= i__1; ++m) { - if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * - sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { - e[m] = 0.; - goto L30; - } -/* L20: */ - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = dlanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - - i__1 = lend - 1; - for (i__ = l; i__ <= i__1; ++i__) { -/* Computing 2nd power */ - d__1 = e[i__]; - e[i__] = d__1 * d__1; -/* L40: */ - } - -/* Choose between QL and QR iteration */ - - if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { - lend = lsv; - l = lendsv; - } - - if (lend >= l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L50: - if (l != lend) { - i__1 = lend - 1; - for (m = l; m <= i__1; ++m) { - if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m - + 1], abs(d__1))) { - goto L70; - } -/* L60: */ - } - } - m = lend; - -L70: - if (m < lend) { - e[m] = 0.; - } - p = d__[l]; - if (m == l) { - goto L90; - } - -/* - If remaining matrix is 2 by 2, use DLAE2 to compute its - eigenvalues. -*/ - - if (m == l + 1) { - rte = sqrt(e[l]); - dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.; - l += 2; - if (l <= lend) { - goto L50; - } - goto L150; - } - - if (jtot == nmaxit) { - goto L150; - } - ++jtot; - -/* Form shift. */ - - rte = sqrt(e[l]); - sigma = (d__[l + 1] - p) / (rte * 2.); - r__ = dlapy2_(&sigma, &c_b2453); - sigma = p - rte / (sigma + d_sign(&r__, &sigma)); - - c__ = 1.; - s = 0.; - gamma = d__[m] - sigma; - p = gamma * gamma; - -/* Inner loop */ - - i__1 = l; - for (i__ = m - 1; i__ >= i__1; --i__) { - bb = e[i__]; - r__ = p + bb; - if (i__ != m - 1) { - e[i__ + 1] = s * r__; - } - oldc = c__; - c__ = p / r__; - s = bb / r__; - oldgam = gamma; - alpha = d__[i__]; - gamma = c__ * (alpha - sigma) - s * oldgam; - d__[i__ + 1] = oldgam + (alpha - gamma); - if (c__ != 0.) { - p = gamma * gamma / c__; - } else { - p = oldc * bb; - } -/* L80: */ - } - - e[l] = s * p; - d__[l] = sigma + gamma; - goto L50; - -/* Eigenvalue found. */ - -L90: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L50; - } - goto L150; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L100: - i__1 = lend + 1; - for (m = l; m >= i__1; --m) { - if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m - - 1], abs(d__1))) { - goto L120; - } -/* L110: */ - } - m = lend; - -L120: - if (m > lend) { - e[m - 1] = 0.; - } - p = d__[l]; - if (m == l) { - goto L140; - } - -/* - If remaining matrix is 2 by 2, use DLAE2 to compute its - eigenvalues. -*/ - - if (m == l - 1) { - rte = sqrt(e[l - 1]); - dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); - d__[l] = rt1; - d__[l - 1] = rt2; - e[l - 1] = 0.; - l += -2; - if (l >= lend) { - goto L100; - } - goto L150; - } - - if (jtot == nmaxit) { - goto L150; - } - ++jtot; - -/* Form shift. */ - - rte = sqrt(e[l - 1]); - sigma = (d__[l - 1] - p) / (rte * 2.); - r__ = dlapy2_(&sigma, &c_b2453); - sigma = p - rte / (sigma + d_sign(&r__, &sigma)); - - c__ = 1.; - s = 0.; - gamma = d__[m] - sigma; - p = gamma * gamma; - -/* Inner loop */ - - i__1 = l - 1; - for (i__ = m; i__ <= i__1; ++i__) { - bb = e[i__]; - r__ = p + bb; - if (i__ != m) { - e[i__ - 1] = s * r__; - } - oldc = c__; - c__ = p / r__; - s = bb / r__; - oldgam = gamma; - alpha = d__[i__ + 1]; - gamma = c__ * (alpha - sigma) - s * oldgam; - d__[i__] = oldgam + (alpha - gamma); - if (c__ != 0.) { - p = gamma * gamma / c__; - } else { - p = oldc * bb; - } -/* L130: */ - } - - e[l - 1] = s * p; - d__[l] = sigma + gamma; - goto L100; - -/* Eigenvalue found. */ - -L140: - d__[l] = p; - - --l; - if (l >= lend) { - goto L100; - } - goto L150; - - } - -/* Undo scaling if necessary */ - -L150: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - } - if (iscale == 2) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot < nmaxit) { - goto L10; - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.) { - ++(*info); - } -/* L160: */ - } - goto L180; - -/* Sort eigenvalues in increasing order. */ - -L170: - dlasrt_("I", n, &d__[1], info); - -L180: - return 0; - -/* End of DSTERF */ - -} /* dsterf_ */ - -/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal * - a, integer *lda, doublereal *w, doublereal *work, integer *lwork, - integer *iwork, integer *liwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal eps; - static integer inde; - static doublereal anrm, rmin, rmax; - static integer lopt; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - static doublereal sigma; - extern logical lsame_(char *, char *); - static integer iinfo, lwmin, liopt; - static logical lower, wantz; - static integer indwk2, llwrk2; - - static integer iscale; - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dstedc_(char *, integer *, - doublereal *, doublereal *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, integer *), dlacpy_( - char *, integer *, integer *, doublereal *, integer *, doublereal - *, integer *); - static doublereal safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal bignum; - static integer indtau; - extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, - integer *); - extern doublereal dlansy_(char *, char *, integer *, doublereal *, - integer *, doublereal *); - static integer indwrk, liwmin; - extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *); - static integer llwork; - static doublereal smlnum; - static logical lquery; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DSYEVD computes all eigenvalues and, optionally, eigenvectors of a - real symmetric matrix A. If eigenvectors are desired, it uses a - divide and conquer algorithm. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Because of large use of BLAS of level 3, DSYEVD needs N**2 more - workspace than DSYEVX. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only; - = 'V': Compute eigenvalues and eigenvectors. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA, N) - On entry, the symmetric matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of A contains the - upper triangular part of the matrix A. If UPLO = 'L', - the leading N-by-N lower triangular part of A contains - the lower triangular part of the matrix A. - On exit, if JOBZ = 'V', then if INFO = 0, A contains the - orthonormal eigenvectors of the matrix A. - If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') - or the upper triangle (if UPLO='U') of A, including the - diagonal, is destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) DOUBLE PRECISION array, dimension (N) - If INFO = 0, the eigenvalues in ascending order. - - WORK (workspace/output) DOUBLE PRECISION array, - dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If N <= 1, LWORK must be at least 1. - If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. - If JOBZ = 'V' and N > 1, LWORK must be at least - 1 + 6*N + 2*N**2. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If N <= 1, LIWORK must be at least 1. - If JOBZ = 'N' and N > 1, LIWORK must be at least 1. - If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the algorithm failed to converge; i - off-diagonal elements of an intermediate tridiagonal - form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - --work; - --iwork; - - /* Function Body */ - wantz = lsame_(jobz, "V"); - lower = lsame_(uplo, "L"); - lquery = *lwork == -1 || *liwork == -1; - - *info = 0; - if (*n <= 1) { - liwmin = 1; - lwmin = 1; - lopt = lwmin; - liopt = liwmin; - } else { - if (wantz) { - liwmin = *n * 5 + 3; -/* Computing 2nd power */ - i__1 = *n; - lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); - } else { - liwmin = 1; - lwmin = (*n << 1) + 1; - } - lopt = lwmin; - liopt = liwmin; - } - if (! (wantz || lsame_(jobz, "N"))) { - *info = -1; - } else if (! (lower || lsame_(uplo, "U"))) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*liwork < liwmin && ! lquery) { - *info = -10; - } - - if (*info == 0) { - work[1] = (doublereal) lopt; - iwork[1] = liopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DSYEVD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - w[1] = a[a_dim1 + 1]; - if (wantz) { - a[a_dim1 + 1] = 1.; - } - return 0; - } - -/* Get machine constants. */ - - safmin = SAFEMINIMUM; - eps = PRECISION; - smlnum = safmin / eps; - bignum = 1. / smlnum; - rmin = sqrt(smlnum); - rmax = sqrt(bignum); - -/* Scale matrix to allowable range, if necessary. */ - - anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); - iscale = 0; - if (anrm > 0. && anrm < rmin) { - iscale = 1; - sigma = rmin / anrm; - } else if (anrm > rmax) { - iscale = 1; - sigma = rmax / anrm; - } - if (iscale == 1) { - dlascl_(uplo, &c__0, &c__0, &c_b2453, &sigma, n, n, &a[a_offset], lda, - info); - } - -/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ - - inde = 1; - indtau = inde + *n; - indwrk = indtau + *n; - llwork = *lwork - indwrk + 1; - indwk2 = indwrk + *n * *n; - llwrk2 = *lwork - indwk2 + 1; - - dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & - work[indwrk], &llwork, &iinfo); - lopt = (integer) ((*n << 1) + work[indwrk]); - -/* - For eigenvalues only, call DSTERF. For eigenvectors, first call - DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the - tridiagonal matrix, then call DORMTR to multiply it by the - Householder transformations stored in A. -*/ - - if (! wantz) { - dsterf_(n, &w[1], &work[inde], info); - } else { - dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & - llwrk2, &iwork[1], liwork, info); - dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ - indwrk], n, &work[indwk2], &llwrk2, &iinfo); - dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); -/* - Computing MAX - Computing 2nd power -*/ - i__3 = *n; - i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); - lopt = max(i__1,i__2); - } - -/* If matrix was scaled, then rescale eigenvalues appropriately. */ - - if (iscale == 1) { - d__1 = 1. / sigma; - dscal_(n, &d__1, &w[1], &c__1); - } - - work[1] = (doublereal) lopt; - iwork[1] = liopt; - - return 0; - -/* End of DSYEVD */ - -} /* dsyevd_ */ - -/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer * - lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - static doublereal taui; - extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - integer *); - static doublereal alpha; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *), dlarfg_(integer *, doublereal *, - doublereal *, integer *, doublereal *), xerbla_(char *, integer * - ); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal - form T by an orthogonal similarity transformation: Q' * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the orthogonal - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the orthogonal matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) DOUBLE PRECISION array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) DOUBLE PRECISION array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DSYTD2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - - if (upper) { - -/* Reduce the upper triangle of A */ - - for (i__ = *n - 1; i__ >= 1; --i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(1:i-1,i+1) -*/ - - dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 - + 1], &c__1, &taui); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - - if (taui != 0.) { - -/* Apply H(i) from both sides to A(1:i,1:i) */ - - a[i__ + (i__ + 1) * a_dim1] = 1.; - -/* Compute x := tau * A * v storing x in TAU(1:i) */ - - dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * - a_dim1 + 1], &c__1, &c_b2467, &tau[1], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) - * a_dim1 + 1], &c__1); - daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ - 1], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - dsyr2_(uplo, &i__, &c_b2589, &a[(i__ + 1) * a_dim1 + 1], & - c__1, &tau[1], &c__1, &a[a_offset], lda); - - a[i__ + (i__ + 1) * a_dim1] = e[i__]; - } - d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; - tau[i__] = taui; -/* L10: */ - } - d__[1] = a[a_dim1 + 1]; - } else { - -/* Reduce the lower triangle of A */ - - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(i+2:n,i) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &taui); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - - if (taui != 0.) { - -/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ - - a[i__ + 1 + i__ * a_dim1] = 1.; - -/* Compute x := tau * A * v storing y in TAU(i:n-1) */ - - i__2 = *n - i__; - dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2467, & - tau[i__], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - i__2 = *n - i__; - alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ + - 1 + i__ * a_dim1], &c__1); - i__2 = *n - i__; - daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - i__2 = *n - i__; - dsyr2_(uplo, &i__2, &c_b2589, &a[i__ + 1 + i__ * a_dim1], & - c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda); - - a[i__ + 1 + i__ * a_dim1] = e[i__]; - } - d__[i__] = a[i__ + i__ * a_dim1]; - tau[i__] = taui; -/* L20: */ - } - d__[*n] = a[*n + *n * a_dim1]; - } - - return 0; - -/* End of DSYTD2 */ - -} /* dsytd2_ */ - -/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer * - lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, kk, nx, iws; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - static logical upper; - extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *, - integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal - *, doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, integer *), dlatrd_(char *, - integer *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, integer *), xerbla_(char *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DSYTRD reduces a real symmetric matrix A to real symmetric - tridiagonal form T by an orthogonal similarity transformation: - Q**T * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the orthogonal - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the orthogonal matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) DOUBLE PRECISION array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) DOUBLE PRECISION array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - --work; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*lwork < 1 && ! lquery) { - *info = -9; - } - - if (*info == 0) { - -/* Determine the block size. */ - - nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, - (ftnlen)1); - lwkopt = *n * nb; - work[1] = (doublereal) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("DSYTRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1] = 1.; - return 0; - } - - nx = *n; - iws = 1; - if (nb > 1 && nb < *n) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *n) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code by setting NX = N. - - Computing MAX -*/ - i__1 = *lwork / ldwork; - nb = max(i__1,1); - nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, - (ftnlen)6, (ftnlen)1); - if (nb < nbmin) { - nx = *n; - } - } - } else { - nx = *n; - } - } else { - nb = 1; - } - - if (upper) { - -/* - Reduce the upper triangle of A. - Columns 1:kk are handled by the unblocked method. -*/ - - kk = *n - (*n - nx + nb - 1) / nb * nb; - i__1 = kk + 1; - i__2 = -nb; - for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = i__ + nb - 1; - dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & - work[1], &ldwork); - -/* - Update the unreduced submatrix A(1:i-1,1:i-1), using an - update of the form: A := A - V*W' - W*V' -*/ - - i__3 = i__ - 1; - dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b2589, &a[i__ * - a_dim1 + 1], lda, &work[1], &ldwork, &c_b2453, &a[ - a_offset], lda); - -/* - Copy superdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j - 1 + j * a_dim1] = e[j - 1]; - d__[j] = a[j + j * a_dim1]; -/* L10: */ - } -/* L20: */ - } - -/* Use unblocked code to reduce the last or only block */ - - dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); - } else { - -/* Reduce the lower triangle of A */ - - i__2 = *n - nx; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = *n - i__ + 1; - dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & - tau[i__], &work[1], &ldwork); - -/* - Update the unreduced submatrix A(i+ib:n,i+ib:n), using - an update of the form: A := A - V*W' - W*V' -*/ - - i__3 = *n - i__ - nb + 1; - dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b2589, &a[i__ + nb + - i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b2453, &a[ - i__ + nb + (i__ + nb) * a_dim1], lda); - -/* - Copy subdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + 1 + j * a_dim1] = e[j]; - d__[j] = a[j + j * a_dim1]; -/* L30: */ - } -/* L40: */ - } - -/* Use unblocked code to reduce the last or only block */ - - i__1 = *n - i__ + 1; - dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], - &tau[i__], &iinfo); - } - - work[1] = (doublereal) lwkopt; - return 0; - -/* End of DSYTRD */ - -} /* dsytrd_ */ - -/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select, - integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * - ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, - doublereal *work, integer *info) -{ - /* System generated locals */ - integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3; - doublereal d__1, d__2, d__3, d__4; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k; - static doublereal x[4] /* was [2][2] */; - static integer j1, j2, n2, ii, ki, ip, is; - static doublereal wi, wr, rec, ulp, beta, emax; - static logical pair; - extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, - integer *); - static logical allv; - static integer ierr; - static doublereal unfl, ovfl, smin; - static logical over; - static doublereal vmax; - static integer jnxt; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - static doublereal scale; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *); - static doublereal remax; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static logical leftv, bothv; - extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, - integer *, doublereal *, integer *); - static doublereal vcrit; - static logical somev; - static doublereal xnorm; - extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *, - doublereal *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, doublereal * - , doublereal *, integer *, doublereal *, doublereal *, integer *), - dlabad_(doublereal *, doublereal *); - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal bignum; - static logical rightv; - static doublereal smlnum; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - DTREVC computes some or all of the right and/or left eigenvectors of - a real upper quasi-triangular matrix T. - - The right eigenvector x and the left eigenvector y of T corresponding - to an eigenvalue w are defined by: - - T*x = w*x, y'*T = w*y' - - where y' denotes the conjugate transpose of the vector y. - - If all eigenvectors are requested, the routine may either return the - matrices X and/or Y of right or left eigenvectors of T, or the - products Q*X and/or Q*Y, where Q is an input orthogonal - matrix. If T was obtained from the real-Schur factorization of an - original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of - right or left eigenvectors of A. - - T must be in Schur canonical form (as returned by DHSEQR), that is, - block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each - 2-by-2 diagonal block has its diagonal elements equal and its - off-diagonal elements of opposite sign. Corresponding to each 2-by-2 - diagonal block is a complex conjugate pair of eigenvalues and - eigenvectors; only one eigenvector of the pair is computed, namely - the one corresponding to the eigenvalue with positive imaginary part. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'R': compute right eigenvectors only; - = 'L': compute left eigenvectors only; - = 'B': compute both right and left eigenvectors. - - HOWMNY (input) CHARACTER*1 - = 'A': compute all right and/or left eigenvectors; - = 'B': compute all right and/or left eigenvectors, - and backtransform them using the input matrices - supplied in VR and/or VL; - = 'S': compute selected right and/or left eigenvectors, - specified by the logical array SELECT. - - SELECT (input/output) LOGICAL array, dimension (N) - If HOWMNY = 'S', SELECT specifies the eigenvectors to be - computed. - If HOWMNY = 'A' or 'B', SELECT is not referenced. - To select the real eigenvector corresponding to a real - eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select - the complex eigenvector corresponding to a complex conjugate - pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be - set to .TRUE.; then on exit SELECT(j) is .TRUE. and - SELECT(j+1) is .FALSE.. - - N (input) INTEGER - The order of the matrix T. N >= 0. - - T (input) DOUBLE PRECISION array, dimension (LDT,N) - The upper quasi-triangular matrix T in Schur canonical form. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= max(1,N). - - VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) - On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must - contain an N-by-N matrix Q (usually the orthogonal matrix Q - of Schur vectors returned by DHSEQR). - On exit, if SIDE = 'L' or 'B', VL contains: - if HOWMNY = 'A', the matrix Y of left eigenvectors of T; - VL has the same quasi-lower triangular form - as T'. If T(i,i) is a real eigenvalue, then - the i-th column VL(i) of VL is its - corresponding eigenvector. If T(i:i+1,i:i+1) - is a 2-by-2 block whose eigenvalues are - complex-conjugate eigenvalues of T, then - VL(i)+sqrt(-1)*VL(i+1) is the complex - eigenvector corresponding to the eigenvalue - with positive real part. - if HOWMNY = 'B', the matrix Q*Y; - if HOWMNY = 'S', the left eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VL, in the same order as their - eigenvalues. - A complex eigenvector corresponding to a complex eigenvalue - is stored in two consecutive columns, the first holding the - real part, and the second the imaginary part. - If SIDE = 'R', VL is not referenced. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= max(1,N) if - SIDE = 'L' or 'B'; LDVL >= 1 otherwise. - - VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) - On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must - contain an N-by-N matrix Q (usually the orthogonal matrix Q - of Schur vectors returned by DHSEQR). - On exit, if SIDE = 'R' or 'B', VR contains: - if HOWMNY = 'A', the matrix X of right eigenvectors of T; - VR has the same quasi-upper triangular form - as T. If T(i,i) is a real eigenvalue, then - the i-th column VR(i) of VR is its - corresponding eigenvector. If T(i:i+1,i:i+1) - is a 2-by-2 block whose eigenvalues are - complex-conjugate eigenvalues of T, then - VR(i)+sqrt(-1)*VR(i+1) is the complex - eigenvector corresponding to the eigenvalue - with positive real part. - if HOWMNY = 'B', the matrix Q*X; - if HOWMNY = 'S', the right eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VR, in the same order as their - eigenvalues. - A complex eigenvector corresponding to a complex eigenvalue - is stored in two consecutive columns, the first holding the - real part and the second the imaginary part. - If SIDE = 'L', VR is not referenced. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= max(1,N) if - SIDE = 'R' or 'B'; LDVR >= 1 otherwise. - - MM (input) INTEGER - The number of columns in the arrays VL and/or VR. MM >= M. - - M (output) INTEGER - The number of columns in the arrays VL and/or VR actually - used to store the eigenvectors. - If HOWMNY = 'A' or 'B', M is set to N. - Each selected real eigenvector occupies one column and each - selected complex eigenvector occupies two columns. - - WORK (workspace) DOUBLE PRECISION array, dimension (3*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The algorithm used in this program is basically backward (forward) - substitution, with scaling to make the the code robust against - possible overflow. - - Each eigenvector is normalized so that the element of largest - magnitude has magnitude 1; here the magnitude of a complex number - (x,y) is taken to be |x| + |y|. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - --select; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - - /* Function Body */ - bothv = lsame_(side, "B"); - rightv = lsame_(side, "R") || bothv; - leftv = lsame_(side, "L") || bothv; - - allv = lsame_(howmny, "A"); - over = lsame_(howmny, "B"); - somev = lsame_(howmny, "S"); - - *info = 0; - if (! rightv && ! leftv) { - *info = -1; - } else if (! allv && ! over && ! somev) { - *info = -2; - } else if (*n < 0) { - *info = -4; - } else if (*ldt < max(1,*n)) { - *info = -6; - } else if (*ldvl < 1 || leftv && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || rightv && *ldvr < *n) { - *info = -10; - } else { - -/* - Set M to the number of columns required to store the selected - eigenvectors, standardize the array SELECT if necessary, and - test MM. -*/ - - if (somev) { - *m = 0; - pair = FALSE_; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (pair) { - pair = FALSE_; - select[j] = FALSE_; - } else { - if (j < *n) { - if (t[j + 1 + j * t_dim1] == 0.) { - if (select[j]) { - ++(*m); - } - } else { - pair = TRUE_; - if (select[j] || select[j + 1]) { - select[j] = TRUE_; - *m += 2; - } - } - } else { - if (select[*n]) { - ++(*m); - } - } - } -/* L10: */ - } - } else { - *m = *n; - } - - if (*mm < *m) { - *info = -11; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DTREVC", &i__1); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* Set the constants to control overflow. */ - - unfl = SAFEMINIMUM; - ovfl = 1. / unfl; - dlabad_(&unfl, &ovfl); - ulp = PRECISION; - smlnum = unfl * (*n / ulp); - bignum = (1. - ulp) / smlnum; - -/* - Compute 1-norm of each column of strictly upper triangular - part of T to control overflow in triangular solver. -*/ - - work[1] = 0.; - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - work[j] = 0.; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); -/* L20: */ - } -/* L30: */ - } - -/* - Index IP is used to specify the real or complex eigenvalue: - IP = 0, real eigenvalue, - 1, first of conjugate complex pair: (wr,wi) - -1, second of conjugate complex pair: (wr,wi) -*/ - - n2 = *n << 1; - - if (rightv) { - -/* Compute right eigenvectors. */ - - ip = 0; - is = *m; - for (ki = *n; ki >= 1; --ki) { - - if (ip == 1) { - goto L130; - } - if (ki == 1) { - goto L40; - } - if (t[ki + (ki - 1) * t_dim1] == 0.) { - goto L40; - } - ip = -1; - -L40: - if (somev) { - if (ip == 0) { - if (! select[ki]) { - goto L130; - } - } else { - if (! select[ki - 1]) { - goto L130; - } - } - } - -/* Compute the KI-th eigenvalue (WR,WI). */ - - wr = t[ki + ki * t_dim1]; - wi = 0.; - if (ip != 0) { - wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * - sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); - } -/* Computing MAX */ - d__1 = ulp * (abs(wr) + abs(wi)); - smin = max(d__1,smlnum); - - if (ip == 0) { - -/* Real right eigenvector */ - - work[ki + *n] = 1.; - -/* Form right-hand side */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - work[k + *n] = -t[k + ki * t_dim1]; -/* L50: */ - } - -/* - Solve the upper quasi-triangular system: - (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. -*/ - - jnxt = ki - 1; - for (j = ki - 1; j >= 1; --j) { - if (j > jnxt) { - goto L60; - } - j1 = j; - j2 = j; - jnxt = j - 1; - if (j > 1) { - if (t[j + (j - 1) * t_dim1] != 0.) { - j1 = j - 1; - jnxt = j - 2; - } - } - - if (j1 == j2) { - -/* 1-by-1 diagonal block */ - - dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &c_b2467, x, &c__2, &scale, & - xnorm, &ierr); - -/* - Scale X(1,1) to avoid overflow when updating - the right-hand side. -*/ - - if (xnorm > 1.) { - if (work[j] > bignum / xnorm) { - x[0] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.) { - dscal_(&ki, &scale, &work[*n + 1], &c__1); - } - work[j + *n] = x[0]; - -/* Update right-hand side */ - - i__1 = j - 1; - d__1 = -x[0]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - - } else { - -/* 2-by-2 diagonal block */ - - dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b2453, &t[j - - 1 + (j - 1) * t_dim1], ldt, &c_b2453, & - c_b2453, &work[j - 1 + *n], n, &wr, &c_b2467, - x, &c__2, &scale, &xnorm, &ierr); - -/* - Scale X(1,1) and X(2,1) to avoid overflow when - updating the right-hand side. -*/ - - if (xnorm > 1.) { -/* Computing MAX */ - d__1 = work[j - 1], d__2 = work[j]; - beta = max(d__1,d__2); - if (beta > bignum / xnorm) { - x[0] /= xnorm; - x[1] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.) { - dscal_(&ki, &scale, &work[*n + 1], &c__1); - } - work[j - 1 + *n] = x[0]; - work[j + *n] = x[1]; - -/* Update right-hand side */ - - i__1 = j - 2; - d__1 = -x[0]; - daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[*n + 1], &c__1); - i__1 = j - 2; - d__1 = -x[1]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - } -L60: - ; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & - c__1); - - ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); - remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); - dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - vr[k + is * vr_dim1] = 0.; -/* L70: */ - } - } else { - if (ki > 1) { - i__1 = ki - 1; - dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr, - &work[*n + 1], &c__1, &work[ki + *n], &vr[ki * - vr_dim1 + 1], &c__1); - } - - ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); - remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); - dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - - } else { - -/* - Complex right eigenvector. - - Initial solve - [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. - [ (T(KI,KI-1) T(KI,KI) ) ] -*/ - - if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[ - ki + (ki - 1) * t_dim1], abs(d__2))) { - work[ki - 1 + *n] = 1.; - work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; - } else { - work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; - work[ki + n2] = 1.; - } - work[ki + *n] = 0.; - work[ki - 1 + n2] = 0.; - -/* Form right-hand side */ - - i__1 = ki - 2; - for (k = 1; k <= i__1; ++k) { - work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * - t_dim1]; - work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; -/* L80: */ - } - -/* - Solve upper quasi-triangular system: - (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) -*/ - - jnxt = ki - 2; - for (j = ki - 2; j >= 1; --j) { - if (j > jnxt) { - goto L90; - } - j1 = j; - j2 = j; - jnxt = j - 1; - if (j > 1) { - if (t[j + (j - 1) * t_dim1] != 0.) { - j1 = j - 1; - jnxt = j - 2; - } - } - - if (j1 == j2) { - -/* 1-by-1 diagonal block */ - - dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &wi, x, &c__2, &scale, & - xnorm, &ierr); - -/* - Scale X(1,1) and X(1,2) to avoid overflow when - updating the right-hand side. -*/ - - if (xnorm > 1.) { - if (work[j] > bignum / xnorm) { - x[0] /= xnorm; - x[2] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.) { - dscal_(&ki, &scale, &work[*n + 1], &c__1); - dscal_(&ki, &scale, &work[n2 + 1], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; - -/* Update the right-hand side */ - - i__1 = j - 1; - d__1 = -x[0]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - i__1 = j - 1; - d__1 = -x[2]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - n2 + 1], &c__1); - - } else { - -/* 2-by-2 diagonal block */ - - dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b2453, &t[j - - 1 + (j - 1) * t_dim1], ldt, &c_b2453, & - c_b2453, &work[j - 1 + *n], n, &wr, &wi, x, & - c__2, &scale, &xnorm, &ierr); - -/* - Scale X to avoid overflow when updating - the right-hand side. -*/ - - if (xnorm > 1.) { -/* Computing MAX */ - d__1 = work[j - 1], d__2 = work[j]; - beta = max(d__1,d__2); - if (beta > bignum / xnorm) { - rec = 1. / xnorm; - x[0] *= rec; - x[2] *= rec; - x[1] *= rec; - x[3] *= rec; - scale *= rec; - } - } - -/* Scale if necessary */ - - if (scale != 1.) { - dscal_(&ki, &scale, &work[*n + 1], &c__1); - dscal_(&ki, &scale, &work[n2 + 1], &c__1); - } - work[j - 1 + *n] = x[0]; - work[j + *n] = x[1]; - work[j - 1 + n2] = x[2]; - work[j + n2] = x[3]; - -/* Update the right-hand side */ - - i__1 = j - 2; - d__1 = -x[0]; - daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[*n + 1], &c__1); - i__1 = j - 2; - d__1 = -x[1]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - i__1 = j - 2; - d__1 = -x[2]; - daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[n2 + 1], &c__1); - i__1 = j - 2; - d__1 = -x[3]; - daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ - n2 + 1], &c__1); - } -L90: - ; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 - + 1], &c__1); - dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & - c__1); - - emax = 0.; - i__1 = ki; - for (k = 1; k <= i__1; ++k) { -/* Computing MAX */ - d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1] - , abs(d__1)) + (d__2 = vr[k + is * vr_dim1], - abs(d__2)); - emax = max(d__3,d__4); -/* L100: */ - } - - remax = 1. / emax; - dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); - dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - vr[k + (is - 1) * vr_dim1] = 0.; - vr[k + is * vr_dim1] = 0.; -/* L110: */ - } - - } else { - - if (ki > 2) { - i__1 = ki - 2; - dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr, - &work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[ - (ki - 1) * vr_dim1 + 1], &c__1); - i__1 = ki - 2; - dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr, - &work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * - vr_dim1 + 1], &c__1); - } else { - dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 - + 1], &c__1); - dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & - c__1); - } - - emax = 0.; - i__1 = *n; - for (k = 1; k <= i__1; ++k) { -/* Computing MAX */ - d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1] - , abs(d__1)) + (d__2 = vr[k + ki * vr_dim1], - abs(d__2)); - emax = max(d__3,d__4); -/* L120: */ - } - remax = 1. / emax; - dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); - dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - } - - --is; - if (ip != 0) { - --is; - } -L130: - if (ip == 1) { - ip = 0; - } - if (ip == -1) { - ip = 1; - } -/* L140: */ - } - } - - if (leftv) { - -/* Compute left eigenvectors. */ - - ip = 0; - is = 1; - i__1 = *n; - for (ki = 1; ki <= i__1; ++ki) { - - if (ip == -1) { - goto L250; - } - if (ki == *n) { - goto L150; - } - if (t[ki + 1 + ki * t_dim1] == 0.) { - goto L150; - } - ip = 1; - -L150: - if (somev) { - if (! select[ki]) { - goto L250; - } - } - -/* Compute the KI-th eigenvalue (WR,WI). */ - - wr = t[ki + ki * t_dim1]; - wi = 0.; - if (ip != 0) { - wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * - sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); - } -/* Computing MAX */ - d__1 = ulp * (abs(wr) + abs(wi)); - smin = max(d__1,smlnum); - - if (ip == 0) { - -/* Real left eigenvector. */ - - work[ki + *n] = 1.; - -/* Form right-hand side */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - work[k + *n] = -t[ki + k * t_dim1]; -/* L160: */ - } - -/* - Solve the quasi-triangular system: - (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK -*/ - - vmax = 1.; - vcrit = bignum; - - jnxt = ki + 1; - i__2 = *n; - for (j = ki + 1; j <= i__2; ++j) { - if (j < jnxt) { - goto L170; - } - j1 = j; - j2 = j; - jnxt = j + 1; - if (j < *n) { - if (t[j + 1 + j * t_dim1] != 0.) { - j2 = j + 1; - jnxt = j + 2; - } - } - - if (j1 == j2) { - -/* - 1-by-1 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side. -*/ - - if (work[j] > vcrit) { - rec = 1. / vmax; - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + *n], &c__1); - vmax = 1.; - vcrit = bignum; - } - - i__3 = j - ki - 1; - work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], - &c__1, &work[ki + 1 + *n], &c__1); - -/* Solve (T(J,J)-WR)'*X = WORK */ - - dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &c_b2467, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.) { - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + *n], &c__1); - } - work[j + *n] = x[0]; -/* Computing MAX */ - d__2 = (d__1 = work[j + *n], abs(d__1)); - vmax = max(d__2,vmax); - vcrit = bignum / vmax; - - } else { - -/* - 2-by-2 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side. - - Computing MAX -*/ - d__1 = work[j], d__2 = work[j + 1]; - beta = max(d__1,d__2); - if (beta > vcrit) { - rec = 1. / vmax; - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + *n], &c__1); - vmax = 1.; - vcrit = bignum; - } - - i__3 = j - ki - 1; - work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], - &c__1, &work[ki + 1 + *n], &c__1); - - i__3 = j - ki - 1; - work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) * - t_dim1], &c__1, &work[ki + 1 + *n], &c__1); - -/* - Solve - [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) - [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) -*/ - - dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &c_b2467, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.) { - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + *n], &c__1); - } - work[j + *n] = x[0]; - work[j + 1 + *n] = x[1]; - -/* Computing MAX */ - d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 - = work[j + 1 + *n], abs(d__2)), d__3 = max( - d__3,d__4); - vmax = max(d__3,vmax); - vcrit = bignum / vmax; - - } -L170: - ; - } - -/* Copy the vector x or Q*x to VL and normalize. */ - - if (! over) { - i__2 = *n - ki + 1; - dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * - vl_dim1], &c__1); - - i__2 = *n - ki + 1; - ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - - 1; - remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); - i__2 = *n - ki + 1; - dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - vl[k + is * vl_dim1] = 0.; -/* L180: */ - } - - } else { - - if (ki < *n) { - i__2 = *n - ki; - dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 1) * - vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1, - &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1); - } - - ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); - remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); - dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - - } - - } else { - -/* - Complex left eigenvector. - - Initial solve: - ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. - ((T(KI+1,KI) T(KI+1,KI+1)) ) -*/ - - if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 = - t[ki + 1 + ki * t_dim1], abs(d__2))) { - work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; - work[ki + 1 + n2] = 1.; - } else { - work[ki + *n] = 1.; - work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; - } - work[ki + 1 + *n] = 0.; - work[ki + n2] = 0.; - -/* Form right-hand side */ - - i__2 = *n; - for (k = ki + 2; k <= i__2; ++k) { - work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; - work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] - ; -/* L190: */ - } - -/* - Solve complex quasi-triangular system: - ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 -*/ - - vmax = 1.; - vcrit = bignum; - - jnxt = ki + 2; - i__2 = *n; - for (j = ki + 2; j <= i__2; ++j) { - if (j < jnxt) { - goto L200; - } - j1 = j; - j2 = j; - jnxt = j + 1; - if (j < *n) { - if (t[j + 1 + j * t_dim1] != 0.) { - j2 = j + 1; - jnxt = j + 2; - } - } - - if (j1 == j2) { - -/* - 1-by-1 diagonal block - - Scale if necessary to avoid overflow when - forming the right-hand side elements. -*/ - - if (work[j] > vcrit) { - rec = 1. / vmax; - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + n2], &c__1); - vmax = 1.; - vcrit = bignum; - } - - i__3 = j - ki - 2; - work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + *n], &c__1); - i__3 = j - ki - 2; - work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + n2], &c__1); - -/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ - - d__1 = -wi; - dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &d__1, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.) { - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + n2], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; -/* Computing MAX */ - d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 - = work[j + n2], abs(d__2)), d__3 = max(d__3, - d__4); - vmax = max(d__3,vmax); - vcrit = bignum / vmax; - - } else { - -/* - 2-by-2 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side elements. - - Computing MAX -*/ - d__1 = work[j], d__2 = work[j + 1]; - beta = max(d__1,d__2); - if (beta > vcrit) { - rec = 1. / vmax; - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - dscal_(&i__3, &rec, &work[ki + n2], &c__1); - vmax = 1.; - vcrit = bignum; - } - - i__3 = j - ki - 2; - work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + *n], &c__1); - - i__3 = j - ki - 2; - work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + n2], &c__1); - - i__3 = j - ki - 2; - work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * - t_dim1], &c__1, &work[ki + 2 + *n], &c__1); - - i__3 = j - ki - 2; - work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * - t_dim1], &c__1, &work[ki + 2 + n2], &c__1); - -/* - Solve 2-by-2 complex linear equation - ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B - ([T(j+1,j) T(j+1,j+1)] ) -*/ - - d__1 = -wi; - dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b2453, &t[j - + j * t_dim1], ldt, &c_b2453, &c_b2453, &work[ - j + *n], n, &wr, &d__1, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.) { - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - dscal_(&i__3, &scale, &work[ki + n2], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; - work[j + 1 + *n] = x[1]; - work[j + 1 + n2] = x[3]; -/* Computing MAX */ - d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, - d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2) - , d__2 = abs(x[3]), d__1 = max(d__1,d__2); - vmax = max(d__1,vmax); - vcrit = bignum / vmax; - - } -L200: - ; - } - -/* - Copy the vector x or Q*x to VL and normalize. - - L210: -*/ - if (! over) { - i__2 = *n - ki + 1; - dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * - vl_dim1], &c__1); - i__2 = *n - ki + 1; - dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * - vl_dim1], &c__1); - - emax = 0.; - i__2 = *n; - for (k = ki; k <= i__2; ++k) { -/* Computing MAX */ - d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs( - d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1], - abs(d__2)); - emax = max(d__3,d__4); -/* L220: */ - } - remax = 1. / emax; - i__2 = *n - ki + 1; - dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - i__2 = *n - ki + 1; - dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) - ; - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - vl[k + is * vl_dim1] = 0.; - vl[k + (is + 1) * vl_dim1] = 0.; -/* L230: */ - } - } else { - if (ki < *n - 1) { - i__2 = *n - ki - 1; - dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 2) * - vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1, - &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1); - i__2 = *n - ki - 1; - dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 2) * - vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1, - &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + - 1], &c__1); - } else { - dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & - c__1); - dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 - + 1], &c__1); - } - - emax = 0.; - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing MAX */ - d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs( - d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1], - abs(d__2)); - emax = max(d__3,d__4); -/* L240: */ - } - remax = 1. / emax; - dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); - - } - - } - - ++is; - if (ip != 0) { - ++is; - } -L250: - if (ip == -1) { - ip = 0; - } - if (ip == 1) { - ip = -1; - } - -/* L260: */ - } - - } - - return 0; - -/* End of DTREVC */ - -} /* dtrevc_ */ - -/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal * - a, integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer j; - static doublereal ajj; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, - doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - DTRTI2 computes the inverse of a real upper or lower triangular - matrix. - - This is the Level 2 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading n by n upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DTRTI2", &i__1); - return 0; - } - - if (upper) { - -/* Compute inverse of upper triangular matrix. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (nounit) { - a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; - ajj = -a[j + j * a_dim1]; - } else { - ajj = -1.; - } - -/* Compute elements 1:j-1 of j-th column. */ - - i__2 = j - 1; - dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & - a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* Compute inverse of lower triangular matrix. */ - - for (j = *n; j >= 1; --j) { - if (nounit) { - a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; - ajj = -a[j + j * a_dim1]; - } else { - ajj = -1.; - } - if (j < *n) { - -/* Compute elements j+1:n of j-th column. */ - - i__1 = *n - j; - dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + - 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); - i__1 = *n - j; - dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - - return 0; - -/* End of DTRTI2 */ - -} /* dtrti2_ */ - -/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal * - a, integer *lda, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer j, jb, nb, nn; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, - integer *, integer *, doublereal *, doublereal *, integer *, - doublereal *, integer *), dtrsm_( - char *, char *, char *, char *, integer *, integer *, doublereal * - , doublereal *, integer *, doublereal *, integer *); - static logical upper; - extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal - *, integer *, integer *), xerbla_(char *, integer - *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - DTRTRI computes the inverse of a real upper or lower triangular - matrix A. - - This is the Level 3 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': A is upper triangular; - = 'L': A is lower triangular. - - DIAG (input) CHARACTER*1 - = 'N': A is non-unit triangular; - = 'U': A is unit triangular. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) DOUBLE PRECISION array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, A(i,i) is exactly zero. The triangular - matrix is singular and its inverse can not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("DTRTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Check for singularity if non-unit. */ - - if (nounit) { - i__1 = *n; - for (*info = 1; *info <= i__1; ++(*info)) { - if (a[*info + *info * a_dim1] == 0.) { - return 0; - } -/* L10: */ - } - *info = 0; - } - -/* - Determine the block size for this environment. - - Writing concatenation -*/ - i__2[0] = 1, a__1[0] = uplo; - i__2[1] = 1, a__1[1] = diag; - s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); - nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - dtrti2_(uplo, diag, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute inverse of upper triangular matrix */ - - i__1 = *n; - i__3 = nb; - for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { -/* Computing MIN */ - i__4 = nb, i__5 = *n - j + 1; - jb = min(i__4,i__5); - -/* Compute rows 1:j-1 of current block column */ - - i__4 = j - 1; - dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & - c_b2453, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); - i__4 = j - 1; - dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & - c_b2589, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], - lda); - -/* Compute inverse of current diagonal block */ - - dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L20: */ - } - } else { - -/* Compute inverse of lower triangular matrix */ - - nn = (*n - 1) / nb * nb + 1; - i__3 = -nb; - for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { -/* Computing MIN */ - i__1 = nb, i__4 = *n - j + 1; - jb = min(i__1,i__4); - if (j + jb <= *n) { - -/* Compute rows j+jb:n of current block column */ - - i__1 = *n - j - jb + 1; - dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, - &c_b2453, &a[j + jb + (j + jb) * a_dim1], lda, &a[ - j + jb + j * a_dim1], lda); - i__1 = *n - j - jb + 1; - dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, - &c_b2589, &a[j + j * a_dim1], lda, &a[j + jb + j - * a_dim1], lda); - } - -/* Compute inverse of current diagonal block */ - - dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L30: */ - } - } - } - - return 0; - -/* End of DTRTRI */ - -} /* dtrtri_ */ - -integer ieeeck_(integer *ispec, real *zero, real *one) -{ - /* System generated locals */ - integer ret_val; - - /* Local variables */ - static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, - newzro; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1998 - - - Purpose - ======= - - IEEECK is called from the ILAENV to verify that Infinity and - possibly NaN arithmetic is safe (i.e. will not trap). - - Arguments - ========= - - ISPEC (input) INTEGER - Specifies whether to test just for inifinity arithmetic - or whether to test for infinity and NaN arithmetic. - = 0: Verify infinity arithmetic only. - = 1: Verify infinity and NaN arithmetic. - - ZERO (input) REAL - Must contain the value 0.0 - This is passed to prevent the compiler from optimizing - away this code. - - ONE (input) REAL - Must contain the value 1.0 - This is passed to prevent the compiler from optimizing - away this code. - - RETURN VALUE: INTEGER - = 0: Arithmetic failed to produce the correct answers - = 1: Arithmetic produced the correct answers -*/ - - ret_val = 1; - - posinf = *one / *zero; - if (posinf <= *one) { - ret_val = 0; - return ret_val; - } - - neginf = -(*one) / *zero; - if (neginf >= *zero) { - ret_val = 0; - return ret_val; - } - - negzro = *one / (neginf + *one); - if (negzro != *zero) { - ret_val = 0; - return ret_val; - } - - neginf = *one / negzro; - if (neginf >= *zero) { - ret_val = 0; - return ret_val; - } - - newzro = negzro + *zero; - if (newzro != *zero) { - ret_val = 0; - return ret_val; - } - - posinf = *one / newzro; - if (posinf <= *one) { - ret_val = 0; - return ret_val; - } - - neginf *= posinf; - if (neginf >= *zero) { - ret_val = 0; - return ret_val; - } - - posinf *= posinf; - if (posinf <= *one) { - ret_val = 0; - return ret_val; - } - - -/* Return if we were only asked to check infinity arithmetic */ - - if (*ispec == 0) { - return ret_val; - } - - nan1 = posinf + neginf; - - nan2 = posinf / neginf; - - nan3 = posinf / posinf; - - nan4 = posinf * *zero; - - nan5 = neginf * negzro; - - nan6 = nan5 * 0.f; - - if (nan1 == nan1) { - ret_val = 0; - return ret_val; - } - - if (nan2 == nan2) { - ret_val = 0; - return ret_val; - } - - if (nan3 == nan3) { - ret_val = 0; - return ret_val; - } - - if (nan4 == nan4) { - ret_val = 0; - return ret_val; - } - - if (nan5 == nan5) { - ret_val = 0; - return ret_val; - } - - if (nan6 == nan6) { - ret_val = 0; - return ret_val; - } - - return ret_val; -} /* ieeeck_ */ - -integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, - integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen - opts_len) -{ - /* System generated locals */ - integer ret_val; - - /* Builtin functions */ - /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); - integer s_cmp(char *, char *, ftnlen, ftnlen); - - /* Local variables */ - static integer i__; - static char c1[1], c2[2], c3[3], c4[2]; - static integer ic, nb, iz, nx; - static logical cname, sname; - static integer nbmin; - extern integer ieeeck_(integer *, real *, real *); - static char subnam[6]; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ILAENV is called from the LAPACK routines to choose problem-dependent - parameters for the local environment. See ISPEC for a description of - the parameters. - - This version provides a set of parameters which should give good, - but not optimal, performance on many of the currently available - computers. Users are encouraged to modify this subroutine to set - the tuning parameters for their particular machine using the option - and problem size information in the arguments. - - This routine will not function correctly if it is converted to all - lower case. Converting it to all upper case is allowed. - - Arguments - ========= - - ISPEC (input) INTEGER - Specifies the parameter to be returned as the value of - ILAENV. - = 1: the optimal blocksize; if this value is 1, an unblocked - algorithm will give the best performance. - = 2: the minimum block size for which the block routine - should be used; if the usable block size is less than - this value, an unblocked routine should be used. - = 3: the crossover point (in a block routine, for N less - than this value, an unblocked routine should be used) - = 4: the number of shifts, used in the nonsymmetric - eigenvalue routines - = 5: the minimum column dimension for blocking to be used; - rectangular blocks must have dimension at least k by m, - where k is given by ILAENV(2,...) and m by ILAENV(5,...) - = 6: the crossover point for the SVD (when reducing an m by n - matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds - this value, a QR factorization is used first to reduce - the matrix to a triangular form.) - = 7: the number of processors - = 8: the crossover point for the multishift QR and QZ methods - for nonsymmetric eigenvalue problems. - = 9: maximum size of the subproblems at the bottom of the - computation tree in the divide-and-conquer algorithm - (used by xGELSD and xGESDD) - =10: ieee NaN arithmetic can be trusted not to trap - =11: infinity arithmetic can be trusted not to trap - - NAME (input) CHARACTER*(*) - The name of the calling subroutine, in either upper case or - lower case. - - OPTS (input) CHARACTER*(*) - The character options to the subroutine NAME, concatenated - into a single character string. For example, UPLO = 'U', - TRANS = 'T', and DIAG = 'N' for a triangular routine would - be specified as OPTS = 'UTN'. - - N1 (input) INTEGER - N2 (input) INTEGER - N3 (input) INTEGER - N4 (input) INTEGER - Problem dimensions for the subroutine NAME; these may not all - be required. - - (ILAENV) (output) INTEGER - >= 0: the value of the parameter specified by ISPEC - < 0: if ILAENV = -k, the k-th argument had an illegal value. - - Further Details - =============== - - The following conventions have been used when calling ILAENV from the - LAPACK routines: - 1) OPTS is a concatenation of all of the character options to - subroutine NAME, in the same order that they appear in the - argument list for NAME, even if they are not used in determining - the value of the parameter specified by ISPEC. - 2) The problem dimensions N1, N2, N3, N4 are specified in the order - that they appear in the argument list for NAME. N1 is used - first, N2 second, and so on, and unused problem dimensions are - passed a value of -1. - 3) The parameter value returned by ILAENV is checked for validity in - the calling subroutine. For example, ILAENV is used to retrieve - the optimal blocksize for STRTRI as follows: - - NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) - IF( NB.LE.1 ) NB = MAX( 1, N ) - - ===================================================================== -*/ - - - switch (*ispec) { - case 1: goto L100; - case 2: goto L100; - case 3: goto L100; - case 4: goto L400; - case 5: goto L500; - case 6: goto L600; - case 7: goto L700; - case 8: goto L800; - case 9: goto L900; - case 10: goto L1000; - case 11: goto L1100; - } - -/* Invalid value for ISPEC */ - - ret_val = -1; - return ret_val; - -L100: - -/* Convert NAME to upper case if the first character is lower case. */ - - ret_val = 1; - s_copy(subnam, name__, (ftnlen)6, name_len); - ic = *(unsigned char *)subnam; - iz = 'Z'; - if (iz == 90 || iz == 122) { - -/* ASCII character set */ - - if (ic >= 97 && ic <= 122) { - *(unsigned char *)subnam = (char) (ic - 32); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 97 && ic <= 122) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); - } -/* L10: */ - } - } - - } else if (iz == 233 || iz == 169) { - -/* EBCDIC character set */ - - if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && - ic <= 169) { - *(unsigned char *)subnam = (char) (ic + 64); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= - 162 && ic <= 169) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); - } -/* L20: */ - } - } - - } else if (iz == 218 || iz == 250) { - -/* Prime machines: ASCII+128 */ - - if (ic >= 225 && ic <= 250) { - *(unsigned char *)subnam = (char) (ic - 32); - for (i__ = 2; i__ <= 6; ++i__) { - ic = *(unsigned char *)&subnam[i__ - 1]; - if (ic >= 225 && ic <= 250) { - *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); - } -/* L30: */ - } - } - } - - *(unsigned char *)c1 = *(unsigned char *)subnam; - sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; - cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; - if (! (cname || sname)) { - return ret_val; - } - s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); - s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); - s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); - - switch (*ispec) { - case 1: goto L110; - case 2: goto L200; - case 3: goto L300; - } - -L110: - -/* - ISPEC = 1: block size - - In these examples, separate code is provided for setting NB for - real and complex. We assume that NB will take the same value in - single or double precision. -*/ - - nb = 1; - - if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, - "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) - 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) - == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 32; - } else { - nb = 32; - } - } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nb = 32; - } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { - nb = 64; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - nb = 64; - } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nb = 32; - } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { - nb = 64; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nb = 32; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nb = 32; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nb = 32; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nb = 32; - } - } - } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - if (*n4 <= 64) { - nb = 1; - } else { - nb = 32; - } - } else { - if (*n4 <= 64) { - nb = 1; - } else { - nb = 32; - } - } - } - } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - if (*n2 <= 64) { - nb = 1; - } else { - nb = 32; - } - } else { - if (*n2 <= 64) { - nb = 1; - } else { - nb = 32; - } - } - } - } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nb = 64; - } else { - nb = 64; - } - } - } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { - nb = 1; - } - } - ret_val = nb; - return ret_val; - -L200: - -/* ISPEC = 2: minimum block size */ - - nbmin = 2; - if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( - ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( - ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) - { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nbmin = 2; - } else { - nbmin = 2; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nbmin = 8; - } else { - nbmin = 8; - } - } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nbmin = 2; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nbmin = 2; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nbmin = 2; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nbmin = 2; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nbmin = 2; - } - } else if (*(unsigned char *)c3 == 'M') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nbmin = 2; - } - } - } - ret_val = nbmin; - return ret_val; - -L300: - -/* ISPEC = 3: crossover point */ - - nx = 0; - if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( - ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( - ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) - { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { - if (sname) { - nx = 128; - } else { - nx = 128; - } - } - } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { - if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nx = 32; - } - } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { - if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { - nx = 32; - } - } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nx = 128; - } - } - } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { - if (*(unsigned char *)c3 == 'G') { - if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", - (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( - ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == - 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( - c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( - ftnlen)2, (ftnlen)2) == 0) { - nx = 128; - } - } - } - ret_val = nx; - return ret_val; - -L400: - -/* ISPEC = 4: number of shifts (used by xHSEQR) */ - - ret_val = 6; - return ret_val; - -L500: - -/* ISPEC = 5: minimum column dimension (not used) */ - - ret_val = 2; - return ret_val; - -L600: - -/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ - - ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); - return ret_val; - -L700: - -/* ISPEC = 7: number of processors (not used) */ - - ret_val = 1; - return ret_val; - -L800: - -/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ - - ret_val = 50; - return ret_val; - -L900: - -/* - ISPEC = 9: maximum size of the subproblems at the bottom of the - computation tree in the divide-and-conquer algorithm - (used by xGELSD and xGESDD) -*/ - - ret_val = 25; - return ret_val; - -L1000: - -/* - ISPEC = 10: ieee NaN arithmetic can be trusted not to trap - - ILAENV = 0 -*/ - ret_val = 1; - if (ret_val == 1) { - ret_val = ieeeck_(&c__0, &c_b1101, &c_b871); - } - return ret_val; - -L1100: - -/* - ISPEC = 11: infinity arithmetic can be trusted not to trap - - ILAENV = 0 -*/ - ret_val = 1; - if (ret_val == 1) { - ret_val = ieeeck_(&c__1, &c_b1101, &c_b871); - } - return ret_val; - -/* End of ILAENV */ - -} /* ilaenv_ */ - -/* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__, - real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q, - integer *iq, real *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; - real r__1; - - /* Builtin functions */ - double r_sign(real *, real *), log(doublereal); - - /* Local variables */ - static integer i__, j, k; - static real p, r__; - static integer z__, ic, ii, kk; - static real cs; - static integer is, iu; - static real sn; - static integer nm1; - static real eps; - static integer ivt, difl, difr, ierr, perm, mlvl, sqre; - extern logical lsame_(char *, char *); - static integer poles; - extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, - integer *, real *, real *, real *, integer *); - static integer iuplo, nsize, start; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), sswap_(integer *, real *, integer *, real *, integer * - ), slasd0_(integer *, integer *, real *, real *, real *, integer * - , real *, integer *, integer *, integer *, real *, integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int slasda_(integer *, integer *, integer *, - integer *, real *, real *, real *, integer *, real *, integer *, - real *, real *, real *, real *, integer *, integer *, integer *, - integer *, real *, real *, real *, real *, integer *, integer *), - xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - static integer givcol; - extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer - *, integer *, integer *, real *, real *, real *, integer *, real * - , integer *, real *, integer *, real *, integer *); - static integer icompq; - extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, - real *, real *, integer *), slartg_(real *, real *, real * - , real *, real *); - static real orgnrm; - static integer givnum; - extern doublereal slanst_(char *, integer *, real *, real *); - static integer givptr, qstart, smlsiz, wstart, smlszp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 1, 1999 - - - Purpose - ======= - - SBDSDC computes the singular value decomposition (SVD) of a real - N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, - using a divide and conquer method, where S is a diagonal matrix - with non-negative diagonal elements (the singular values of B), and - U and VT are orthogonal matrices of left and right singular vectors, - respectively. SBDSDC can be used to compute all singular values, - and optionally, singular vectors or singular vectors in compact form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See SLASD3 for details. - - The code currently call SLASDQ if singular values only are desired. - However, it can be slightly modified to compute singular values - using the divide and conquer method. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': B is upper bidiagonal. - = 'L': B is lower bidiagonal. - - COMPQ (input) CHARACTER*1 - Specifies whether singular vectors are to be computed - as follows: - = 'N': Compute singular values only; - = 'P': Compute singular values and compute singular - vectors in compact form; - = 'I': Compute singular values and singular vectors. - - N (input) INTEGER - The order of the matrix B. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the n diagonal elements of the bidiagonal matrix B. - On exit, if INFO=0, the singular values of B. - - E (input/output) REAL array, dimension (N) - On entry, the elements of E contain the offdiagonal - elements of the bidiagonal matrix whose SVD is desired. - On exit, E has been destroyed. - - U (output) REAL array, dimension (LDU,N) - If COMPQ = 'I', then: - On exit, if INFO = 0, U contains the left singular vectors - of the bidiagonal matrix. - For other values of COMPQ, U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1. - If singular vectors are desired, then LDU >= max( 1, N ). - - VT (output) REAL array, dimension (LDVT,N) - If COMPQ = 'I', then: - On exit, if INFO = 0, VT' contains the right singular - vectors of the bidiagonal matrix. - For other values of COMPQ, VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1. - If singular vectors are desired, then LDVT >= max( 1, N ). - - Q (output) REAL array, dimension (LDQ) - If COMPQ = 'P', then: - On exit, if INFO = 0, Q and IQ contain the left - and right singular vectors in a compact form, - requiring O(N log N) space instead of 2*N**2. - In particular, Q contains all the REAL data in - LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) - words of memory, where SMLSIZ is returned by ILAENV and - is equal to the maximum size of the subproblems at the - bottom of the computation tree (usually about 25). - For other values of COMPQ, Q is not referenced. - - IQ (output) INTEGER array, dimension (LDIQ) - If COMPQ = 'P', then: - On exit, if INFO = 0, Q and IQ contain the left - and right singular vectors in a compact form, - requiring O(N log N) space instead of 2*N**2. - In particular, IQ contains all INTEGER data in - LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) - words of memory, where SMLSIZ is returned by ILAENV and - is equal to the maximum size of the subproblems at the - bottom of the computation tree (usually about 25). - For other values of COMPQ, IQ is not referenced. - - WORK (workspace) REAL array, dimension (LWORK) - If COMPQ = 'N' then LWORK >= (4 * N). - If COMPQ = 'P' then LWORK >= (6 * N). - If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). - - IWORK (workspace) INTEGER array, dimension (8*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an singular value. - The update process of divide and conquer failed. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --q; - --iq; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - iuplo = 0; - if (lsame_(uplo, "U")) { - iuplo = 1; - } - if (lsame_(uplo, "L")) { - iuplo = 2; - } - if (lsame_(compq, "N")) { - icompq = 0; - } else if (lsame_(compq, "P")) { - icompq = 1; - } else if (lsame_(compq, "I")) { - icompq = 2; - } else { - icompq = -1; - } - if (iuplo == 0) { - *info = -1; - } else if (icompq < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { - *info = -7; - } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SBDSDC", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - if (*n == 1) { - if (icompq == 1) { - q[1] = r_sign(&c_b871, &d__[1]); - q[smlsiz * *n + 1] = 1.f; - } else if (icompq == 2) { - u[u_dim1 + 1] = r_sign(&c_b871, &d__[1]); - vt[vt_dim1 + 1] = 1.f; - } - d__[1] = dabs(d__[1]); - return 0; - } - nm1 = *n - 1; - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left -*/ - - wstart = 1; - qstart = 3; - if (icompq == 1) { - scopy_(n, &d__[1], &c__1, &q[1], &c__1); - i__1 = *n - 1; - scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); - } - if (iuplo == 2) { - qstart = 5; - wstart = (*n << 1) - 1; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (icompq == 1) { - q[i__ + (*n << 1)] = cs; - q[i__ + *n * 3] = sn; - } else if (icompq == 2) { - work[i__] = cs; - work[nm1 + i__] = -sn; - } -/* L10: */ - } - } - -/* If ICOMPQ = 0, use SLASDQ to compute the singular values. */ - - if (icompq == 0) { - slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ - vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ - wstart], info); - goto L40; - } - -/* - If N is smaller than the minimum divide size SMLSIZ, then solve - the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompq == 2) { - slaset_("A", n, n, &c_b1101, &c_b871, &u[u_offset], ldu); - slaset_("A", n, n, &c_b1101, &c_b871, &vt[vt_offset], ldvt); - slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] - , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ - wstart], info); - } else if (icompq == 1) { - iu = 1; - ivt = iu + *n; - slaset_("A", n, n, &c_b1101, &c_b871, &q[iu + (qstart - 1) * *n], - n); - slaset_("A", n, n, &c_b1101, &c_b871, &q[ivt + (qstart - 1) * *n], - n); - slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( - qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ - iu + (qstart - 1) * *n], n, &work[wstart], info); - } - goto L40; - } - - if (icompq == 2) { - slaset_("A", n, n, &c_b1101, &c_b871, &u[u_offset], ldu); - slaset_("A", n, n, &c_b1101, &c_b871, &vt[vt_offset], ldvt) - ; - } - -/* Scale. */ - - orgnrm = slanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.f) { - return 0; - } - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, n, &c__1, &d__[1], n, &ierr); - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &nm1, &c__1, &e[1], &nm1, & - ierr); - - eps = slamch_("Epsilon"); - - mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1; - smlszp = smlsiz + 1; - - if (icompq == 1) { - iu = 1; - ivt = smlsiz + 1; - difl = ivt + smlszp; - difr = difl + mlvl; - z__ = difr + (mlvl << 1); - ic = z__ + mlvl; - is = ic + 1; - poles = is + 1; - givnum = poles + (mlvl << 1); - - k = 1; - givptr = 2; - perm = 3; - givcol = perm + mlvl; - } - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((r__1 = d__[i__], dabs(r__1)) < eps) { - d__[i__] = r_sign(&eps, &d__[i__]); - } -/* L20: */ - } - - start = 1; - sqre = 0; - - i__1 = nm1; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { - -/* - Subproblem found. First determine its size and then - apply divide and conquer on it. -*/ - - if (i__ < nm1) { - -/* A subproblem with E(I) small for I < NM1. */ - - nsize = i__ - start + 1; - } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { - -/* A subproblem with E(NM1) not too small but I = NM1. */ - - nsize = *n - start + 1; - } else { - -/* - A subproblem with E(NM1) small. This implies an - 1-by-1 subproblem at D(N). Solve this 1-by-1 problem - first. -*/ - - nsize = i__ - start + 1; - if (icompq == 2) { - u[*n + *n * u_dim1] = r_sign(&c_b871, &d__[*n]); - vt[*n + *n * vt_dim1] = 1.f; - } else if (icompq == 1) { - q[*n + (qstart - 1) * *n] = r_sign(&c_b871, &d__[*n]); - q[*n + (smlsiz + qstart - 1) * *n] = 1.f; - } - d__[*n] = (r__1 = d__[*n], dabs(r__1)); - } - if (icompq == 2) { - slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + - start * u_dim1], ldu, &vt[start + start * vt_dim1], - ldvt, &smlsiz, &iwork[1], &work[wstart], info); - } else { - slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ - start], &q[start + (iu + qstart - 2) * *n], n, &q[ - start + (ivt + qstart - 2) * *n], &iq[start + k * *n], - &q[start + (difl + qstart - 2) * *n], &q[start + ( - difr + qstart - 2) * *n], &q[start + (z__ + qstart - - 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ - start + givptr * *n], &iq[start + givcol * *n], n, & - iq[start + perm * *n], &q[start + (givnum + qstart - - 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ - start + (is + qstart - 2) * *n], &work[wstart], & - iwork[1], info); - if (*info != 0) { - return 0; - } - } - start = i__ + 1; - } -/* L30: */ - } - -/* Unscale */ - - slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, n, &c__1, &d__[1], n, &ierr); -L40: - -/* Use Selection Sort to minimize swaps of singular vectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - kk = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] > p) { - kk = j; - p = d__[j]; - } -/* L50: */ - } - if (kk != i__) { - d__[kk] = d__[i__]; - d__[i__] = p; - if (icompq == 1) { - iq[i__] = kk; - } else if (icompq == 2) { - sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & - c__1); - sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); - } - } else if (icompq == 1) { - iq[i__] = i__; - } -/* L60: */ - } - -/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ - - if (icompq == 1) { - if (iuplo == 1) { - iq[*n] = 1; - } else { - iq[*n] = 0; - } - } - -/* - If B is lower bidiagonal, update U by those Givens rotations - which rotated B to be upper bidiagonal -*/ - - if (iuplo == 2 && icompq == 2) { - slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); - } - - return 0; - -/* End of SBDSDC */ - -} /* sbdsdc_ */ - -/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * - nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * - u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) -{ - /* System generated locals */ - integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2; - real r__1, r__2, r__3, r__4; - doublereal d__1; - - /* Builtin functions */ - double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real * - , real *); - - /* Local variables */ - static real f, g, h__; - static integer i__, j, m; - static real r__, cs; - static integer ll; - static real sn, mu; - static integer nm1, nm12, nm13, lll; - static real eps, sll, tol, abse; - static integer idir; - static real abss; - static integer oldm; - static real cosl; - static integer isub, iter; - static real unfl, sinl, cosr, smin, smax, sinr; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *), slas2_(real *, real *, real *, real *, - real *); - extern logical lsame_(char *, char *); - static real oldcs; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static integer oldll; - static real shift, sigmn, oldsn; - static integer maxit; - static real sminl; - extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, - integer *, real *, real *, real *, integer *); - static real sigmx; - static logical lower; - extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, - integer *), slasq1_(integer *, real *, real *, real *, integer *), - slasv2_(real *, real *, real *, real *, real *, real *, real *, - real *, real *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static real sminoa; - extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * - ); - static real thresh; - static logical rotate; - static real sminlo, tolmul; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SBDSQR computes the singular value decomposition (SVD) of a real - N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' - denotes the transpose of P), where S is a diagonal matrix with - non-negative diagonal elements (the singular values of B), and Q - and P are orthogonal matrices. - - The routine computes S, and optionally computes U * Q, P' * VT, - or Q' * C, for given real input matrices U, VT, and C. - - See "Computing Small Singular Values of Bidiagonal Matrices With - Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, - LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, - no. 5, pp. 873-912, Sept 1990) and - "Accurate singular values and differential qd algorithms," by - B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics - Department, University of California at Berkeley, July 1992 - for a detailed description of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': B is upper bidiagonal; - = 'L': B is lower bidiagonal. - - N (input) INTEGER - The order of the matrix B. N >= 0. - - NCVT (input) INTEGER - The number of columns of the matrix VT. NCVT >= 0. - - NRU (input) INTEGER - The number of rows of the matrix U. NRU >= 0. - - NCC (input) INTEGER - The number of columns of the matrix C. NCC >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the n diagonal elements of the bidiagonal matrix B. - On exit, if INFO=0, the singular values of B in decreasing - order. - - E (input/output) REAL array, dimension (N) - On entry, the elements of E contain the - offdiagonal elements of the bidiagonal matrix whose SVD - is desired. On normal exit (INFO = 0), E is destroyed. - If the algorithm does not converge (INFO > 0), D and E - will contain the diagonal and superdiagonal elements of a - bidiagonal matrix orthogonally equivalent to the one given - as input. E(N) is used for workspace. - - VT (input/output) REAL array, dimension (LDVT, NCVT) - On entry, an N-by-NCVT matrix VT. - On exit, VT is overwritten by P' * VT. - VT is not referenced if NCVT = 0. - - LDVT (input) INTEGER - The leading dimension of the array VT. - LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. - - U (input/output) REAL array, dimension (LDU, N) - On entry, an NRU-by-N matrix U. - On exit, U is overwritten by U * Q. - U is not referenced if NRU = 0. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= max(1,NRU). - - C (input/output) REAL array, dimension (LDC, NCC) - On entry, an N-by-NCC matrix C. - On exit, C is overwritten by Q' * C. - C is not referenced if NCC = 0. - - LDC (input) INTEGER - The leading dimension of the array C. - LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. - - WORK (workspace) REAL array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: If INFO = -i, the i-th argument had an illegal value - > 0: the algorithm did not converge; D and E contain the - elements of a bidiagonal matrix which is orthogonally - similar to the input matrix B; if INFO = i, i - elements of E have not converged to zero. - - Internal Parameters - =================== - - TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) - TOLMUL controls the convergence criterion of the QR loop. - If it is positive, TOLMUL*EPS is the desired relative - precision in the computed singular values. - If it is negative, abs(TOLMUL*EPS*sigma_max) is the - desired absolute accuracy in the computed singular - values (corresponds to relative accuracy - abs(TOLMUL*EPS) in the largest singular value. - abs(TOLMUL) should be between 1 and 1/EPS, and preferably - between 10 (for fast convergence) and .1/EPS - (for there to be some accuracy in the results). - Default is to lose at either one eighth or 2 of the - available decimal digits in each computed singular value - (whichever is smaller). - - MAXITR INTEGER, default = 6 - MAXITR controls the maximum number of passes of the - algorithm through its inner loop. The algorithms stops - (and so fails to converge) if the number of passes - through the inner loop exceeds MAXITR*N**2. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - lower = lsame_(uplo, "L"); - if (! lsame_(uplo, "U") && ! lower) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ncvt < 0) { - *info = -3; - } else if (*nru < 0) { - *info = -4; - } else if (*ncc < 0) { - *info = -5; - } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { - *info = -9; - } else if (*ldu < max(1,*nru)) { - *info = -11; - } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SBDSQR", &i__1); - return 0; - } - if (*n == 0) { - return 0; - } - if (*n == 1) { - goto L160; - } - -/* ROTATE is true if any singular vectors desired, false otherwise */ - - rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; - -/* If no singular vectors desired, use qd algorithm */ - - if (! rotate) { - slasq1_(n, &d__[1], &e[1], &work[1], info); - return 0; - } - - nm1 = *n - 1; - nm12 = nm1 + nm1; - nm13 = nm12 + nm1; - idir = 0; - -/* Get machine constants */ - - eps = slamch_("Epsilon"); - unfl = slamch_("Safe minimum"); - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left -*/ - - if (lower) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - work[i__] = cs; - work[nm1 + i__] = sn; -/* L10: */ - } - -/* Update singular vectors if desired */ - - if (*nru > 0) { - slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], - ldu); - } - if (*ncc > 0) { - slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], - ldc); - } - } - -/* - Compute singular values to relative accuracy TOL - (By setting TOL to be negative, algorithm will compute - singular values to absolute accuracy ABS(TOL)*norm(input matrix)) - - Computing MAX - Computing MIN -*/ - d__1 = (doublereal) eps; - r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b2532); - r__1 = 10.f, r__2 = dmin(r__3,r__4); - tolmul = dmax(r__1,r__2); - tol = tolmul * eps; - -/* Compute approximate maximum, minimum singular values */ - - smax = 0.f; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1)); - smax = dmax(r__2,r__3); -/* L20: */ - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1)); - smax = dmax(r__2,r__3); -/* L30: */ - } - sminl = 0.f; - if (tol >= 0.f) { - -/* Relative accuracy desired */ - - sminoa = dabs(d__[1]); - if (sminoa == 0.f) { - goto L50; - } - mu = sminoa; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - - 1], dabs(r__1)))); - sminoa = dmin(sminoa,mu); - if (sminoa == 0.f) { - goto L50; - } -/* L40: */ - } -L50: - sminoa /= sqrt((real) (*n)); -/* Computing MAX */ - r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; - thresh = dmax(r__1,r__2); - } else { - -/* - Absolute accuracy desired - - Computing MAX -*/ - r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl; - thresh = dmax(r__1,r__2); - } - -/* - Prepare for main iteration loop for the singular values - (MAXIT is the maximum number of passes through the inner - loop permitted before nonconvergence signalled.) -*/ - - maxit = *n * 6 * *n; - iter = 0; - oldll = -1; - oldm = -1; - -/* M points to last element of unconverged part of matrix */ - - m = *n; - -/* Begin main iteration loop */ - -L60: - -/* Check for convergence or exceeding iteration count */ - - if (m <= 1) { - goto L160; - } - if (iter > maxit) { - goto L200; - } - -/* Find diagonal block of matrix to work on */ - - if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) { - d__[m] = 0.f; - } - smax = (r__1 = d__[m], dabs(r__1)); - smin = smax; - i__1 = m - 1; - for (lll = 1; lll <= i__1; ++lll) { - ll = m - lll; - abss = (r__1 = d__[ll], dabs(r__1)); - abse = (r__1 = e[ll], dabs(r__1)); - if (tol < 0.f && abss <= thresh) { - d__[ll] = 0.f; - } - if (abse <= thresh) { - goto L80; - } - smin = dmin(smin,abss); -/* Computing MAX */ - r__1 = max(smax,abss); - smax = dmax(r__1,abse); -/* L70: */ - } - ll = 0; - goto L90; -L80: - e[ll] = 0.f; - -/* Matrix splits since E(LL) = 0 */ - - if (ll == m - 1) { - -/* Convergence of bottom singular value, return to top of loop */ - - --m; - goto L60; - } -L90: - ++ll; - -/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ - - if (ll == m - 1) { - -/* 2 by 2 block, handle separately */ - - slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, - &sinl, &cosl); - d__[m - 1] = sigmx; - e[m - 1] = 0.f; - d__[m] = sigmn; - -/* Compute singular vectors, if desired */ - - if (*ncvt > 0) { - srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & - cosr, &sinr); - } - if (*nru > 0) { - srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & - c__1, &cosl, &sinl); - } - if (*ncc > 0) { - srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & - cosl, &sinl); - } - m += -2; - goto L60; - } - -/* - If working on new submatrix, choose shift direction - (from larger end diagonal element towards smaller) -*/ - - if (ll > oldm || m < oldll) { - if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) { - -/* Chase bulge from top (big end) to bottom (small end) */ - - idir = 1; - } else { - -/* Chase bulge from bottom (big end) to top (small end) */ - - idir = 2; - } - } - -/* Apply convergence tests */ - - if (idir == 1) { - -/* - Run convergence test in forward direction - First apply standard test to bottom of matrix -*/ - - if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs( - r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= - thresh) { - e[m - 1] = 0.f; - goto L60; - } - - if (tol >= 0.f) { - -/* - If relative accuracy desired, - apply convergence criterion forward -*/ - - mu = (r__1 = d__[ll], dabs(r__1)); - sminl = mu; - i__1 = m - 1; - for (lll = ll; lll <= i__1; ++lll) { - if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { - e[lll] = 0.f; - goto L60; - } - sminlo = sminl; - mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = - e[lll], dabs(r__1)))); - sminl = dmin(sminl,mu); -/* L100: */ - } - } - - } else { - -/* - Run convergence test in backward direction - First apply standard test to top of matrix -*/ - - if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs( - r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) { - e[ll] = 0.f; - goto L60; - } - - if (tol >= 0.f) { - -/* - If relative accuracy desired, - apply convergence criterion backward -*/ - - mu = (r__1 = d__[m], dabs(r__1)); - sminl = mu; - i__1 = ll; - for (lll = m - 1; lll >= i__1; --lll) { - if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { - e[lll] = 0.f; - goto L60; - } - sminlo = sminl; - mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[ - lll], dabs(r__1)))); - sminl = dmin(sminl,mu); -/* L110: */ - } - } - } - oldll = ll; - oldm = m; - -/* - Compute shift. First, test if shifting would ruin relative - accuracy, and if so set the shift to zero. - - Computing MAX -*/ - r__1 = eps, r__2 = tol * .01f; - if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) { - -/* Use a zero shift to avoid loss of relative accuracy */ - - shift = 0.f; - } else { - -/* Compute the shift from 2-by-2 block at end of matrix */ - - if (idir == 1) { - sll = (r__1 = d__[ll], dabs(r__1)); - slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); - } else { - sll = (r__1 = d__[m], dabs(r__1)); - slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); - } - -/* Test if shift negligible, and if so set to zero */ - - if (sll > 0.f) { -/* Computing 2nd power */ - r__1 = shift / sll; - if (r__1 * r__1 < eps) { - shift = 0.f; - } - } - } - -/* Increment iteration count */ - - iter = iter + m - ll; - -/* If SHIFT = 0, do simplified QR iteration */ - - if (shift == 0.f) { - if (idir == 1) { - -/* - Chase bulge from top to bottom - Save cosines and sines for later singular vector updates -*/ - - cs = 1.f; - oldcs = 1.f; - i__1 = m - 1; - for (i__ = ll; i__ <= i__1; ++i__) { - r__1 = d__[i__] * cs; - slartg_(&r__1, &e[i__], &cs, &sn, &r__); - if (i__ > ll) { - e[i__ - 1] = oldsn * r__; - } - r__1 = oldcs * r__; - r__2 = d__[i__ + 1] * sn; - slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); - work[i__ - ll + 1] = cs; - work[i__ - ll + 1 + nm1] = sn; - work[i__ - ll + 1 + nm12] = oldcs; - work[i__ - ll + 1 + nm13] = oldsn; -/* L120: */ - } - h__ = d__[m] * cs; - d__[m] = h__ * oldcs; - e[m - 1] = h__ * oldsn; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ - ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 - + 1], &u[ll * u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 - + 1], &c__[ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { - e[m - 1] = 0.f; - } - - } else { - -/* - Chase bulge from bottom to top - Save cosines and sines for later singular vector updates -*/ - - cs = 1.f; - oldcs = 1.f; - i__1 = ll + 1; - for (i__ = m; i__ >= i__1; --i__) { - r__1 = d__[i__] * cs; - slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); - if (i__ < m) { - e[i__] = oldsn * r__; - } - r__1 = oldcs * r__; - r__2 = d__[i__ - 1] * sn; - slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); - work[i__ - ll] = cs; - work[i__ - ll + nm1] = -sn; - work[i__ - ll + nm12] = oldcs; - work[i__ - ll + nm13] = -oldsn; -/* L130: */ - } - h__ = d__[ll] * cs; - d__[ll] = h__ * oldcs; - e[ll] = h__ * oldsn; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ - nm13 + 1], &vt[ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * - u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ - ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((r__1 = e[ll], dabs(r__1)) <= thresh) { - e[ll] = 0.f; - } - } - } else { - -/* Use nonzero shift */ - - if (idir == 1) { - -/* - Chase bulge from top to bottom - Save cosines and sines for later singular vector updates -*/ - - f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b871, & - d__[ll]) + shift / d__[ll]); - g = e[ll]; - i__1 = m - 1; - for (i__ = ll; i__ <= i__1; ++i__) { - slartg_(&f, &g, &cosr, &sinr, &r__); - if (i__ > ll) { - e[i__ - 1] = r__; - } - f = cosr * d__[i__] + sinr * e[i__]; - e[i__] = cosr * e[i__] - sinr * d__[i__]; - g = sinr * d__[i__ + 1]; - d__[i__ + 1] = cosr * d__[i__ + 1]; - slartg_(&f, &g, &cosl, &sinl, &r__); - d__[i__] = r__; - f = cosl * e[i__] + sinl * d__[i__ + 1]; - d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; - if (i__ < m - 1) { - g = sinl * e[i__ + 1]; - e[i__ + 1] = cosl * e[i__ + 1]; - } - work[i__ - ll + 1] = cosr; - work[i__ - ll + 1 + nm1] = sinr; - work[i__ - ll + 1 + nm12] = cosl; - work[i__ - ll + 1 + nm13] = sinl; -/* L140: */ - } - e[m - 1] = f; - -/* Update singular vectors */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ - ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 - + 1], &u[ll * u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 - + 1], &c__[ll + c_dim1], ldc); - } - -/* Test convergence */ - - if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { - e[m - 1] = 0.f; - } - - } else { - -/* - Chase bulge from bottom to top - Save cosines and sines for later singular vector updates -*/ - - f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b871, &d__[ - m]) + shift / d__[m]); - g = e[m - 1]; - i__1 = ll + 1; - for (i__ = m; i__ >= i__1; --i__) { - slartg_(&f, &g, &cosr, &sinr, &r__); - if (i__ < m) { - e[i__] = r__; - } - f = cosr * d__[i__] + sinr * e[i__ - 1]; - e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; - g = sinr * d__[i__ - 1]; - d__[i__ - 1] = cosr * d__[i__ - 1]; - slartg_(&f, &g, &cosl, &sinl, &r__); - d__[i__] = r__; - f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; - d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; - if (i__ > ll + 1) { - g = sinl * e[i__ - 2]; - e[i__ - 2] = cosl * e[i__ - 2]; - } - work[i__ - ll] = cosr; - work[i__ - ll + nm1] = -sinr; - work[i__ - ll + nm12] = cosl; - work[i__ - ll + nm13] = -sinl; -/* L150: */ - } - e[ll] = f; - -/* Test convergence */ - - if ((r__1 = e[ll], dabs(r__1)) <= thresh) { - e[ll] = 0.f; - } - -/* Update singular vectors if desired */ - - if (*ncvt > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ - nm13 + 1], &vt[ll + vt_dim1], ldvt); - } - if (*nru > 0) { - i__1 = m - ll + 1; - slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * - u_dim1 + 1], ldu); - } - if (*ncc > 0) { - i__1 = m - ll + 1; - slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ - ll + c_dim1], ldc); - } - } - } - -/* QR iteration finished, go back and check convergence */ - - goto L60; - -/* All singular values converged, so make them positive */ - -L160: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (d__[i__] < 0.f) { - d__[i__] = -d__[i__]; - -/* Change sign of singular vectors, if desired */ - - if (*ncvt > 0) { - sscal_(ncvt, &c_b1150, &vt[i__ + vt_dim1], ldvt); - } - } -/* L170: */ - } - -/* - Sort the singular values into decreasing order (insertion sort on - singular values, but only one transposition per singular vector) -*/ - - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Scan for smallest D(I) */ - - isub = 1; - smin = d__[1]; - i__2 = *n + 1 - i__; - for (j = 2; j <= i__2; ++j) { - if (d__[j] <= smin) { - isub = j; - smin = d__[j]; - } -/* L180: */ - } - if (isub != *n + 1 - i__) { - -/* Swap singular values and vectors */ - - d__[isub] = d__[*n + 1 - i__]; - d__[*n + 1 - i__] = smin; - if (*ncvt > 0) { - sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + - vt_dim1], ldvt); - } - if (*nru > 0) { - sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * - u_dim1 + 1], &c__1); - } - if (*ncc > 0) { - sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + - c_dim1], ldc); - } - } -/* L190: */ - } - goto L220; - -/* Maximum number of iterations exceeded, failure to converge */ - -L200: - *info = 0; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.f) { - ++(*info); - } -/* L210: */ - } -L220: - return 0; - -/* End of SBDSQR */ - -} /* sbdsqr_ */ - -/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, - integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer - *info) -{ - /* System generated locals */ - integer v_dim1, v_offset, i__1; - - /* Local variables */ - static integer i__, k; - static real s; - static integer ii; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static logical leftv; - extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, - integer *), xerbla_(char *, integer *); - static logical rightv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SGEBAK forms the right or left eigenvectors of a real general matrix - by backward transformation on the computed eigenvectors of the - balanced matrix output by SGEBAL. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the type of backward transformation required: - = 'N', do nothing, return immediately; - = 'P', do backward transformation for permutation only; - = 'S', do backward transformation for scaling only; - = 'B', do backward transformations for both permutation and - scaling. - JOB must be the same as the argument JOB supplied to SGEBAL. - - SIDE (input) CHARACTER*1 - = 'R': V contains right eigenvectors; - = 'L': V contains left eigenvectors. - - N (input) INTEGER - The number of rows of the matrix V. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - The integers ILO and IHI determined by SGEBAL. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - SCALE (input) REAL array, dimension (N) - Details of the permutation and scaling factors, as returned - by SGEBAL. - - M (input) INTEGER - The number of columns of the matrix V. M >= 0. - - V (input/output) REAL array, dimension (LDV,M) - On entry, the matrix of right or left eigenvectors to be - transformed, as returned by SHSEIN or STREVC. - On exit, V is overwritten by the transformed eigenvectors. - - LDV (input) INTEGER - The leading dimension of the array V. LDV >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Decode and Test the input parameters -*/ - - /* Parameter adjustments */ - --scale; - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - - /* Function Body */ - rightv = lsame_(side, "R"); - leftv = lsame_(side, "L"); - - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (! rightv && ! leftv) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*m < 0) { - *info = -7; - } else if (*ldv < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEBAK", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*m == 0) { - return 0; - } - if (lsame_(job, "N")) { - return 0; - } - - if (*ilo == *ihi) { - goto L30; - } - -/* Backward balance */ - - if (lsame_(job, "S") || lsame_(job, "B")) { - - if (rightv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = scale[i__]; - sscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L10: */ - } - } - - if (leftv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = 1.f / scale[i__]; - sscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L20: */ - } - } - - } - -/* - Backward permutation - - For I = ILO-1 step -1 until 1, - IHI+1 step 1 until N do -- -*/ - -L30: - if (lsame_(job, "P") || lsame_(job, "B")) { - if (rightv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L40; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = scale[i__]; - if (k == i__) { - goto L40; - } - sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L40: - ; - } - } - - if (leftv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L50; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = scale[i__]; - if (k == i__) { - goto L50; - } - sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L50: - ; - } - } - } - - return 0; - -/* End of SGEBAK */ - -} /* sgebak_ */ - -/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, - integer *ilo, integer *ihi, real *scale, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real r__1, r__2; - - /* Local variables */ - static real c__, f, g; - static integer i__, j, k, l, m; - static real r__, s, ca, ra; - static integer ica, ira, iexc; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sswap_(integer *, real *, integer *, real *, integer *); - static real sfmin1, sfmin2, sfmax1, sfmax2; - extern doublereal slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer isamax_(integer *, real *, integer *); - static logical noconv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SGEBAL balances a general real matrix A. This involves, first, - permuting A by a similarity transformation to isolate eigenvalues - in the first 1 to ILO-1 and last IHI+1 to N elements on the - diagonal; and second, applying a diagonal similarity transformation - to rows and columns ILO to IHI to make the rows and columns as - close in norm as possible. Both steps are optional. - - Balancing may reduce the 1-norm of the matrix, and improve the - accuracy of the computed eigenvalues and/or eigenvectors. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the operations to be performed on A: - = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 - for i = 1,...,N; - = 'P': permute only; - = 'S': scale only; - = 'B': both permute and scale. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the input matrix A. - On exit, A is overwritten by the balanced matrix. - If JOB = 'N', A is not referenced. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - ILO (output) INTEGER - IHI (output) INTEGER - ILO and IHI are set to integers such that on exit - A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. - If JOB = 'N' or 'S', ILO = 1 and IHI = N. - - SCALE (output) REAL array, dimension (N) - Details of the permutations and scaling factors applied to - A. If P(j) is the index of the row and column interchanged - with row and column j and D(j) is the scaling factor - applied to row and column j, then - SCALE(j) = P(j) for j = 1,...,ILO-1 - = D(j) for j = ILO,...,IHI - = P(j) for j = IHI+1,...,N. - The order in which the interchanges are made is N to IHI+1, - then 1 to ILO-1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The permutations consist of row and column interchanges which put - the matrix in the form - - ( T1 X Y ) - P A P = ( 0 B Z ) - ( 0 0 T2 ) - - where T1 and T2 are upper triangular matrices whose eigenvalues lie - along the diagonal. The column indices ILO and IHI mark the starting - and ending columns of the submatrix B. Balancing consists of applying - a diagonal similarity transformation inv(D) * B * D to make the - 1-norms of each row of B and its corresponding column nearly equal. - The output matrix is - - ( T1 X*D Y ) - ( 0 inv(D)*B*D inv(D)*Z ). - ( 0 0 T2 ) - - Information about the permutations P and the diagonal matrix D is - returned in the vector SCALE. - - This subroutine is based on the EISPACK routine BALANC. - - Modified by Tzu-Yi Chen, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --scale; - - /* Function Body */ - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEBAL", &i__1); - return 0; - } - - k = 1; - l = *n; - - if (*n == 0) { - goto L210; - } - - if (lsame_(job, "N")) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scale[i__] = 1.f; -/* L10: */ - } - goto L210; - } - - if (lsame_(job, "S")) { - goto L120; - } - -/* Permutation to isolate eigenvalues if possible */ - - goto L50; - -/* Row and column exchange. */ - -L20: - scale[m] = (real) j; - if (j == m) { - goto L30; - } - - sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); - i__1 = *n - k + 1; - sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); - -L30: - switch (iexc) { - case 1: goto L40; - case 2: goto L80; - } - -/* Search for rows isolating an eigenvalue and push them down. */ - -L40: - if (l == 1) { - goto L210; - } - --l; - -L50: - for (j = l; j >= 1; --j) { - - i__1 = l; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ == j) { - goto L60; - } - if (a[j + i__ * a_dim1] != 0.f) { - goto L70; - } -L60: - ; - } - - m = l; - iexc = 1; - goto L20; -L70: - ; - } - - goto L90; - -/* Search for columns isolating an eigenvalue and push them left. */ - -L80: - ++k; - -L90: - i__1 = l; - for (j = k; j <= i__1; ++j) { - - i__2 = l; - for (i__ = k; i__ <= i__2; ++i__) { - if (i__ == j) { - goto L100; - } - if (a[i__ + j * a_dim1] != 0.f) { - goto L110; - } -L100: - ; - } - - m = k; - iexc = 2; - goto L20; -L110: - ; - } - -L120: - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - scale[i__] = 1.f; -/* L130: */ - } - - if (lsame_(job, "P")) { - goto L210; - } - -/* - Balance the submatrix in rows K to L. - - Iterative loop for norm reduction -*/ - - sfmin1 = slamch_("S") / slamch_("P"); - sfmax1 = 1.f / sfmin1; - sfmin2 = sfmin1 * 8.f; - sfmax2 = 1.f / sfmin2; -L140: - noconv = FALSE_; - - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - c__ = 0.f; - r__ = 0.f; - - i__2 = l; - for (j = k; j <= i__2; ++j) { - if (j == i__) { - goto L150; - } - c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1)); - r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); -L150: - ; - } - ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1); - ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1)); - i__2 = *n - k + 1; - ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda); - ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1)); - -/* Guard against zero C or R due to underflow. */ - - if (c__ == 0.f || r__ == 0.f) { - goto L200; - } - g = r__ / 8.f; - f = 1.f; - s = c__ + r__; -L160: -/* Computing MAX */ - r__1 = max(f,c__); -/* Computing MIN */ - r__2 = min(r__,g); - if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { - goto L170; - } - f *= 8.f; - c__ *= 8.f; - ca *= 8.f; - r__ /= 8.f; - g /= 8.f; - ra /= 8.f; - goto L160; - -L170: - g = c__ / 8.f; -L180: -/* Computing MIN */ - r__1 = min(f,c__), r__1 = min(r__1,g); - if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { - goto L190; - } - f /= 8.f; - c__ /= 8.f; - g /= 8.f; - ca /= 8.f; - r__ *= 8.f; - ra *= 8.f; - goto L180; - -/* Now balance. */ - -L190: - if (c__ + r__ >= s * .95f) { - goto L200; - } - if (f < 1.f && scale[i__] < 1.f) { - if (f * scale[i__] <= sfmin1) { - goto L200; - } - } - if (f > 1.f && scale[i__] > 1.f) { - if (scale[i__] >= sfmax1 / f) { - goto L200; - } - } - g = 1.f / f; - scale[i__] *= f; - noconv = TRUE_; - - i__2 = *n - k + 1; - sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); - sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); - -L200: - ; - } - - if (noconv) { - goto L140; - } - -L210: - *ilo = k; - *ihi = l; - - return 0; - -/* End of SGEBAL */ - -} /* sgebal_ */ - -/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, - real *d__, real *e, real *tauq, real *taup, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__; - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *), slarfg_(integer *, real *, real *, - integer *, real *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SGEBD2 reduces a real general m by n matrix A to upper or lower - bidiagonal form B by an orthogonal transformation: Q' * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the orthogonal matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the orthogonal matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) REAL array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) REAL array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - WORK (workspace) REAL array, dimension (max(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); - u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("SGEBD2", &i__1); - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * - a_dim1], &c__1, &tauq[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - -/* Apply H(i) to A(i:m,i+1:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = d__[i__]; - - if (i__ < *n) { - -/* - Generate elementary reflector G(i) to annihilate - A(i,i+2:n) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( - i__3,*n) * a_dim1], lda, &taup[i__]); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - a[i__ + (i__ + 1) * a_dim1] = 1.f; - -/* Apply G(i) to A(i+1:m,i+1:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__; - slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], - lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - a[i__ + (i__ + 1) * a_dim1] = e[i__]; - } else { - taup[i__] = 0.f; - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * - a_dim1], lda, &taup[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - -/* Apply G(i) to A(i+1:m,i:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; -/* Computing MIN */ - i__4 = i__ + 1; - slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ - i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = d__[i__]; - - if (i__ < *m) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:m,i) -*/ - - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + - i__ * a_dim1], &c__1, &tauq[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.f; - -/* Apply H(i) to A(i+1:m,i+1:n) from the left */ - - i__2 = *m - i__; - i__3 = *n - i__; - slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & - c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - a[i__ + 1 + i__ * a_dim1] = e[i__]; - } else { - tauq[i__] = 0.f; - } -/* L20: */ - } - } - return 0; - -/* End of SGEBD2 */ - -} /* sgebd2_ */ - -/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, - real *d__, real *e, real *tauq, real *taup, real *work, integer * - lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, nb, nx; - static real ws; - static integer nbmin, iinfo; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer minmn; - extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer - *, real *, real *, real *, real *, real *, integer *), slabrd_( - integer *, integer *, integer *, real *, integer *, real *, real * - , real *, real *, real *, integer *, real *, integer *), xerbla_( - char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwrkx, ldwrky, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SGEBRD reduces a general real M-by-N matrix A to upper or lower - bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the M-by-N general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the orthogonal matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the orthogonal matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) REAL array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) REAL array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,M,N). - For optimum performance LWORK >= (M+N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); - u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; -/* Computing MAX */ - i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = max(i__1,i__2); - lwkopt = (*m + *n) * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = max(1,*m); - if (*lwork < max(i__1,*n) && ! lquery) { - *info = -10; - } - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("SGEBRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - minmn = min(*m,*n); - if (minmn == 0) { - work[1] = 1.f; - return 0; - } - - ws = (real) max(*m,*n); - ldwrkx = *m; - ldwrky = *n; - - if (nb > 1 && nb < minmn) { - -/* - Set the crossover point NX. - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - -/* Determine when to switch from blocked to unblocked code. */ - - if (nx < minmn) { - ws = (real) ((*m + *n) * nb); - if ((real) (*lwork) < ws) { - -/* - Not enough work space for the optimal NB, consider using - a smaller block size. -*/ - - nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - if (*lwork >= (*m + *n) * nbmin) { - nb = *lwork / (*m + *n); - } else { - nb = 1; - nx = minmn; - } - } - } - } else { - nx = minmn; - } - - i__1 = minmn - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - -/* - Reduce rows and columns i:i+nb-1 to bidiagonal form and return - the matrices X and Y which are needed to update the unreduced - part of the matrix -*/ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ + 1; - slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ - i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx - * nb + 1], &ldwrky); - -/* - Update the trailing submatrix A(i+nb:m,i+nb:n), using an update - of the form A := A - V*Y' - X*U' -*/ - - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b1150, &a[ - i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & - ldwrky, &c_b871, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b1150, & - work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & - c_b871, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - -/* Copy diagonal and off-diagonal elements of B back into A */ - - if (*m >= *n) { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + j * a_dim1] = d__[j]; - a[j + (j + 1) * a_dim1] = e[j]; -/* L10: */ - } - } else { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + j * a_dim1] = d__[j]; - a[j + 1 + j * a_dim1] = e[j]; -/* L20: */ - } - } -/* L30: */ - } - -/* Use unblocked code to reduce the remainder of the matrix */ - - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & - tauq[i__], &taup[i__], &work[1], &iinfo); - work[1] = ws; - return 0; - -/* End of SGEBRD */ - -} /* sgebrd_ */ - -/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a, - integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, - integer *ldvr, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, k; - static real r__, cs, sn; - static integer ihi; - static real scl; - static integer ilo; - static real dum[1], eps; - static integer ibal; - static char side[1]; - static integer maxb; - static real anrm; - static integer ierr, itau, iwrk, nout; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *); - extern doublereal snrm2_(integer *, real *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - extern doublereal slapy2_(real *, real *); - extern /* Subroutine */ int slabad_(real *, real *); - static logical scalea; - static real cscale; - extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, - integer *, integer *, real *, integer *); - extern doublereal slamch_(char *), slange_(char *, integer *, - integer *, real *, integer *, real *); - extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *, integer *), xerbla_(char - *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical select[1]; - static real bignum; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - extern integer isamax_(integer *, real *, integer *); - extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, - integer *, real *, integer *), slartg_(real *, real *, - real *, real *, real *), sorghr_(integer *, integer *, integer *, - real *, integer *, real *, real *, integer *, integer *), shseqr_( - char *, char *, integer *, integer *, integer *, real *, integer * - , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *, - real *, integer *, real *, integer *, real *, integer *, integer * - , integer *, real *, integer *); - static integer minwrk, maxwrk; - static logical wantvl; - static real smlnum; - static integer hswork; - static logical lquery, wantvr; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 8, 1999 - - - Purpose - ======= - - SGEEV computes for an N-by-N real nonsymmetric matrix A, the - eigenvalues and, optionally, the left and/or right eigenvectors. - - The right eigenvector v(j) of A satisfies - A * v(j) = lambda(j) * v(j) - where lambda(j) is its eigenvalue. - The left eigenvector u(j) of A satisfies - u(j)**H * A = lambda(j) * u(j)**H - where u(j)**H denotes the conjugate transpose of u(j). - - The computed eigenvectors are normalized to have Euclidean norm - equal to 1 and largest component real. - - Arguments - ========= - - JOBVL (input) CHARACTER*1 - = 'N': left eigenvectors of A are not computed; - = 'V': left eigenvectors of A are computed. - - JOBVR (input) CHARACTER*1 - = 'N': right eigenvectors of A are not computed; - = 'V': right eigenvectors of A are computed. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the N-by-N matrix A. - On exit, A has been overwritten. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - WR (output) REAL array, dimension (N) - WI (output) REAL array, dimension (N) - WR and WI contain the real and imaginary parts, - respectively, of the computed eigenvalues. Complex - conjugate pairs of eigenvalues appear consecutively - with the eigenvalue having the positive imaginary part - first. - - VL (output) REAL array, dimension (LDVL,N) - If JOBVL = 'V', the left eigenvectors u(j) are stored one - after another in the columns of VL, in the same order - as their eigenvalues. - If JOBVL = 'N', VL is not referenced. - If the j-th eigenvalue is real, then u(j) = VL(:,j), - the j-th column of VL. - If the j-th and (j+1)-st eigenvalues form a complex - conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and - u(j+1) = VL(:,j) - i*VL(:,j+1). - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= 1; if - JOBVL = 'V', LDVL >= N. - - VR (output) REAL array, dimension (LDVR,N) - If JOBVR = 'V', the right eigenvectors v(j) are stored one - after another in the columns of VR, in the same order - as their eigenvalues. - If JOBVR = 'N', VR is not referenced. - If the j-th eigenvalue is real, then v(j) = VR(:,j), - the j-th column of VR. - If the j-th and (j+1)-st eigenvalues form a complex - conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and - v(j+1) = VR(:,j) - i*VR(:,j+1). - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= 1; if - JOBVR = 'V', LDVR >= N. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,3*N), and - if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good - performance, LWORK must generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the - eigenvalues, and no eigenvectors have been computed; - elements i+1:N of WR and WI contain eigenvalues which - have converged. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --wr; - --wi; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1; - wantvl = lsame_(jobvl, "V"); - wantvr = lsame_(jobvr, "V"); - if (! wantvl && ! lsame_(jobvl, "N")) { - *info = -1; - } else if (! wantvr && ! lsame_(jobvr, "N")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldvl < 1 || wantvl && *ldvl < *n) { - *info = -9; - } else if (*ldvr < 1 || wantvr && *ldvr < *n) { - *info = -11; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV. - HSWORK refers to the workspace preferred by SHSEQR, as - calculated below. HSWORK is computed assuming ILO=1 and IHI=N, - the worst case.) -*/ - - minwrk = 1; - if (*info == 0 && (*lwork >= 1 || lquery)) { - maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, & - c__0, (ftnlen)6, (ftnlen)1); - if (! wantvl && ! wantvr) { -/* Computing MAX */ - i__1 = 1, i__2 = *n * 3; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + - hswork; - maxwrk = max(i__1,i__2); - } else { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 2; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "SOR" - "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + - hswork; - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n << 2; - maxwrk = max(i__1,i__2); - } - work[1] = (real) maxwrk; - } - if (*lwork < minwrk && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEEV ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Get machine constants */ - - eps = slamch_("P"); - smlnum = slamch_("S"); - bignum = 1.f / smlnum; - slabad_(&smlnum, &bignum); - smlnum = sqrt(smlnum) / eps; - bignum = 1.f / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = slange_("M", n, n, &a[a_offset], lda, dum); - scalea = FALSE_; - if (anrm > 0.f && anrm < smlnum) { - scalea = TRUE_; - cscale = smlnum; - } else if (anrm > bignum) { - scalea = TRUE_; - cscale = bignum; - } - if (scalea) { - slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & - ierr); - } - -/* - Balance the matrix - (Workspace: need N) -*/ - - ibal = 1; - sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); - -/* - Reduce to upper Hessenberg form - (Workspace: need 3*N, prefer 2*N+N*NB) -*/ - - itau = ibal + *n; - iwrk = itau + *n; - i__1 = *lwork - iwrk + 1; - sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, - &ierr); - - if (wantvl) { - -/* - Want left eigenvectors - Copy Householder vectors to VL -*/ - - *(unsigned char *)side = 'L'; - slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) - ; - -/* - Generate orthogonal matrix in VL - (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) -*/ - - i__1 = *lwork - iwrk + 1; - sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VL - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vl[vl_offset], ldvl, &work[iwrk], &i__1, info); - - if (wantvr) { - -/* - Want left and right eigenvectors - Copy Schur vectors to VR -*/ - - *(unsigned char *)side = 'B'; - slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); - } - - } else if (wantvr) { - -/* - Want right eigenvectors - Copy Householder vectors to VR -*/ - - *(unsigned char *)side = 'R'; - slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) - ; - -/* - Generate orthogonal matrix in VR - (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) -*/ - - i__1 = *lwork - iwrk + 1; - sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VR - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vr[vr_offset], ldvr, &work[iwrk], &i__1, info); - - } else { - -/* - Compute eigenvalues only - (Workspace: need N+1, prefer N+HSWORK (see comments) ) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & - vr[vr_offset], ldvr, &work[iwrk], &i__1, info); - } - -/* If INFO > 0 from SHSEQR, then quit */ - - if (*info > 0) { - goto L50; - } - - if (wantvl || wantvr) { - -/* - Compute left and/or right eigenvectors - (Workspace: need 4*N) -*/ - - strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, - &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); - } - - if (wantvl) { - -/* - Undo balancing of left eigenvectors - (Workspace: need N) -*/ - - sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, - &ierr); - -/* Normalize left eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (wi[i__] == 0.f) { - scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - } else if (wi[i__] > 0.f) { - r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); - scl = 1.f / slapy2_(&r__1, &r__2); - sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing 2nd power */ - r__1 = vl[k + i__ * vl_dim1]; -/* Computing 2nd power */ - r__2 = vl[k + (i__ + 1) * vl_dim1]; - work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; -/* L10: */ - } - k = isamax_(n, &work[iwrk], &c__1); - slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], - &cs, &sn, &r__); - srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * - vl_dim1 + 1], &c__1, &cs, &sn); - vl[k + (i__ + 1) * vl_dim1] = 0.f; - } -/* L20: */ - } - } - - if (wantvr) { - -/* - Undo balancing of right eigenvectors - (Workspace: need N) -*/ - - sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, - &ierr); - -/* Normalize right eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (wi[i__] == 0.f) { - scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - } else if (wi[i__] > 0.f) { - r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); - scl = 1.f / slapy2_(&r__1, &r__2); - sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing 2nd power */ - r__1 = vr[k + i__ * vr_dim1]; -/* Computing 2nd power */ - r__2 = vr[k + (i__ + 1) * vr_dim1]; - work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; -/* L30: */ - } - k = isamax_(n, &work[iwrk], &c__1); - slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], - &cs, &sn, &r__); - srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * - vr_dim1 + 1], &c__1, &cs, &sn); - vr[k + (i__ + 1) * vr_dim1] = 0.f; - } -/* L40: */ - } - } - -/* Undo scaling if necessary */ - -L50: - if (scalea) { - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + - 1], &i__2, &ierr); - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + - 1], &i__2, &ierr); - if (*info > 0) { - i__1 = ilo - 1; - slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], - n, &ierr); - i__1 = ilo - 1; - slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], - n, &ierr); - } - } - - work[1] = (real) maxwrk; - return 0; - -/* End of SGEEV */ - -} /* sgeev_ */ - -/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, - integer *lda, real *tau, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - static real aii; - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *), slarfg_(integer *, real *, real *, - integer *, real *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SGEHD2 reduces a real general matrix A to upper Hessenberg form H by - an orthogonal similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to SGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= max(1,N). - - A (input/output) REAL array, dimension (LDA,N) - On entry, the n by n general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the orthogonal matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) REAL array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) REAL array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEHD2", &i__1); - return 0; - } - - i__1 = *ihi - 1; - for (i__ = *ilo; i__ <= i__1; ++i__) { - -/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ - - i__2 = *ihi - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &tau[i__]); - aii = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.f; - -/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ - - i__2 = *ihi - i__; - slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); - -/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ - - i__2 = *ihi - i__; - i__3 = *n - i__; - slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); - - a[i__ + 1 + i__ * a_dim1] = aii; -/* L10: */ - } - - return 0; - -/* End of SGEHD2 */ - -} /* sgehd2_ */ - -/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, - integer *lda, real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__; - static real t[4160] /* was [65][64] */; - static integer ib; - static real ei; - static integer nb, nh, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *), sgehd2_(integer *, integer *, - integer *, real *, integer *, real *, real *, integer *), slarfb_( - char *, char *, char *, char *, integer *, integer *, integer *, - real *, integer *, real *, integer *, real *, integer *, real *, - integer *), slahrd_(integer *, - integer *, integer *, real *, integer *, real *, real *, integer * - , real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SGEHRD reduces a real general matrix A to upper Hessenberg form H by - an orthogonal similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to SGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the N-by-N general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the orthogonal matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) REAL array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to - zero. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; -/* Computing MIN */ - i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - lwkopt = *n * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEHRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - tau[i__] = 0.f; -/* L10: */ - } - i__1 = *n - 1; - for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { - tau[i__] = 0.f; -/* L20: */ - } - -/* Quick return if possible */ - - nh = *ihi - *ilo + 1; - if (nh <= 1) { - work[1] = 1.f; - return 0; - } - -/* - Determine the block size. - - Computing MIN -*/ - i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - nbmin = 2; - iws = 1; - if (nb > 1 && nb < nh) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < nh) { - -/* Determine if workspace is large enough for blocked code. */ - - iws = *n * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code. - - Computing MAX -*/ - i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - if (*lwork >= *n * nbmin) { - nb = *lwork / *n; - } else { - nb = 1; - } - } - } - } - ldwork = *n; - - if (nb < nbmin || nb >= nh) { - -/* Use unblocked code below */ - - i__ = *ilo; - - } else { - -/* Use blocked code */ - - i__1 = *ihi - 1 - nx; - i__2 = nb; - for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *ihi - i__; - ib = min(i__3,i__4); - -/* - Reduce columns i:i+ib-1 to Hessenberg form, returning the - matrices V and T of the block reflector H = I - V*T*V' - which performs the reduction, and also the matrix Y = A*V*T -*/ - - slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & - c__65, &work[1], &ldwork); - -/* - Apply the block reflector H to A(1:ihi,i+ib:ihi) from the - right, computing A := A - Y * V'. V(i+ib,ib-1) must be set - to 1. -*/ - - ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; - a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f; - i__3 = *ihi - i__ - ib + 1; - sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b1150, & - work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & - c_b871, &a[(i__ + ib) * a_dim1 + 1], lda); - a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; - -/* - Apply the block reflector H to A(i+1:ihi,i+ib:n) from the - left -*/ - - i__3 = *ihi - i__; - i__4 = *n - i__ - ib + 1; - slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & - i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ - i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); -/* L30: */ - } - } - -/* Use unblocked code to reduce the rest of the matrix */ - - sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); - work[1] = (real) iws; - - return 0; - -/* End of SGEHRD */ - -} /* sgehrd_ */ - -/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, - real *tau, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static real aii; - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *), slarfg_(integer *, real *, real *, - integer *, real *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SGELQ2 computes an LQ factorization of a real m by n matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and below the diagonal of the array - contain the m by min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) REAL array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k) . . . H(2) H(1), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGELQ2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] - , lda, &tau[i__]); - if (i__ < *m) { - -/* Apply H(i) to A(i+1:m,i:n) from the right */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - i__2 = *m - i__; - i__3 = *n - i__ + 1; - slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ - i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = aii; - } -/* L10: */ - } - return 0; - -/* End of SGELQ2 */ - -} /* sgelq2_ */ - -/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, - real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer - *, real *, real *, integer *), slarfb_(char *, char *, char *, - char *, integer *, integer *, integer *, real *, integer *, real * - , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SGELQF computes an LQ factorization of a real M-by-N matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and below the diagonal of the array - contain the m-by-min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k) . . . H(2) H(1), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *m * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGELQF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the LQ factorization of the current block - A(i:i+ib-1,i:n) -*/ - - i__3 = *n - i__ + 1; - sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *n - i__ + 1; - slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i+ib:m,i:n) from the right */ - - i__3 = *m - i__ - ib + 1; - i__4 = *n - i__ + 1; - slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, - &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1] = (real) iws; - return 0; - -/* End of SGELQF */ - -} /* sgelqf_ */ - -/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, - real *tau, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static real aii; - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *), slarfg_(integer *, real *, real *, - integer *, real *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SGEQR2 computes a QR factorization of a real m by n matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(m,n) by n upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) REAL array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEQR2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] - , &c__1, &tau[i__]); - if (i__ < *n) { - -/* Apply H(i) to A(i:m,i+1:n) from the left */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - i__2 = *m - i__ + 1; - i__3 = *n - i__; - slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - a[i__ + i__ * a_dim1] = aii; - } -/* L10: */ - } - return 0; - -/* End of SGEQR2 */ - -} /* sgeqr2_ */ - -/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, - real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer - *, real *, real *, integer *), slarfb_(char *, char *, char *, - char *, integer *, integer *, integer *, real *, integer *, real * - , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SGEQRF computes a QR factorization of a real M-by-N matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(M,N)-by-N upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the orthogonal matrix Q as a - product of min(m,n) elementary reflectors (see Further - Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) REAL array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *n * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGEQRF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the QR factorization of the current block - A(i:m,i:i+ib-1) -*/ - - i__3 = *m - i__ + 1; - sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *m - i__ + 1; - slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i:m,i+ib:n) from the left */ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ - ib + 1; - slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & - i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib - + 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1] = (real) iws; - return 0; - -/* End of SGEQRF */ - -} /* sgeqrf_ */ - -/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a, - integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, - real *work, integer *lwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2, i__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, ie, il, ir, iu, blk; - static real dum[1], eps; - static integer ivt, iscl; - static real anrm; - static integer idum[1], ierr, itau; - extern logical lsame_(char *, char *); - static integer chunk; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer minmn, wrkbl, itaup, itauq, mnthr; - static logical wntqa; - static integer nwork; - static logical wntqn, wntqo, wntqs; - static integer bdspac; - extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *, - real *, real *, integer *, real *, integer *, real *, integer *, - real *, integer *, integer *), sgebrd_(integer *, - integer *, real *, integer *, real *, real *, real *, real *, - real *, integer *, integer *); - extern doublereal slamch_(char *), slange_(char *, integer *, - integer *, real *, integer *, real *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static real bignum; - extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer - *, real *, real *, integer *, integer *), slascl_(char *, integer - *, integer *, real *, real *, integer *, integer *, real *, - integer *, integer *), sgeqrf_(integer *, integer *, real - *, integer *, real *, real *, integer *, integer *), slacpy_(char - *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, - real *, integer *), sorgbr_(char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, integer * - ); - static integer ldwrkl; - extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, - integer *, integer *, real *, integer *, real *, real *, integer * - , real *, integer *, integer *); - static integer ldwrkr, minwrk, ldwrku, maxwrk; - extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *, integer *); - static integer ldwkvt; - static real smlnum; - static logical wntqas; - extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *, integer *); - static logical lquery; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SGESDD computes the singular value decomposition (SVD) of a real - M-by-N matrix A, optionally computing the left and right singular - vectors. If singular vectors are desired, it uses a - divide-and-conquer algorithm. - - The SVD is written - - A = U * SIGMA * transpose(V) - - where SIGMA is an M-by-N matrix which is zero except for its - min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and - V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA - are the singular values of A; they are real and non-negative, and - are returned in descending order. The first min(m,n) columns of - U and V are the left and right singular vectors of A. - - Note that the routine returns VT = V**T, not V. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - Specifies options for computing all or part of the matrix U: - = 'A': all M columns of U and all N rows of V**T are - returned in the arrays U and VT; - = 'S': the first min(M,N) columns of U and the first - min(M,N) rows of V**T are returned in the arrays U - and VT; - = 'O': If M >= N, the first N columns of U are overwritten - on the array A and all rows of V**T are returned in - the array VT; - otherwise, all columns of U are returned in the - array U and the first M rows of V**T are overwritten - in the array VT; - = 'N': no columns of U or rows of V**T are computed. - - M (input) INTEGER - The number of rows of the input matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the input matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, - if JOBZ = 'O', A is overwritten with the first N columns - of U (the left singular vectors, stored - columnwise) if M >= N; - A is overwritten with the first M rows - of V**T (the right singular vectors, stored - rowwise) otherwise. - if JOBZ .ne. 'O', the contents of A are destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - S (output) REAL array, dimension (min(M,N)) - The singular values of A, sorted so that S(i) >= S(i+1). - - U (output) REAL array, dimension (LDU,UCOL) - UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; - UCOL = min(M,N) if JOBZ = 'S'. - If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M - orthogonal matrix U; - if JOBZ = 'S', U contains the first min(M,N) columns of U - (the left singular vectors, stored columnwise); - if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1; if - JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. - - VT (output) REAL array, dimension (LDVT,N) - If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the - N-by-N orthogonal matrix V**T; - if JOBZ = 'S', VT contains the first min(M,N) rows of - V**T (the right singular vectors, stored rowwise); - if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1; if - JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; - if JOBZ = 'S', LDVT >= min(M,N). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK; - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - If JOBZ = 'N', - LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). - If JOBZ = 'O', - LWORK >= 3*min(M,N)*min(M,N) + - max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). - If JOBZ = 'S' or 'A' - LWORK >= 3*min(M,N)*min(M,N) + - max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). - For good performance, LWORK should generally be larger. - If LWORK < 0 but other input arguments are legal, WORK(1) - returns the optimal LWORK. - - IWORK (workspace) INTEGER array, dimension (8*min(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: SBDSDC did not converge, updating process failed. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --s; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - mnthr = (integer) (minmn * 11.f / 6.f); - wntqa = lsame_(jobz, "A"); - wntqs = lsame_(jobz, "S"); - wntqas = wntqa || wntqs; - wntqo = lsame_(jobz, "O"); - wntqn = lsame_(jobz, "N"); - minwrk = 1; - maxwrk = 1; - lquery = *lwork == -1; - - if (! (wntqa || wntqs || wntqo || wntqn)) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * - m) { - *info = -8; - } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || - wntqo && *m >= *n && *ldvt < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV.) -*/ - - if (*info == 0 && *m > 0 && *n > 0) { - if (*m >= *n) { - -/* Compute space needed for SBDSDC */ - - if (wntqn) { - bdspac = *n * 7; - } else { - bdspac = *n * 3 * *n + (*n << 2); - } - if (*m >= mnthr) { - if (wntqn) { - -/* Path 1 (M much larger than N, JOBZ='N') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n; - maxwrk = max(i__1,i__2); - minwrk = bdspac + *n; - } else if (wntqo) { - -/* Path 2 (M much larger than N, JOBZ='O') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + (*n << 1) * *n; - minwrk = bdspac + (*n << 1) * *n + *n * 3; - } else if (wntqs) { - -/* Path 3 (M much larger than N, JOBZ='S') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *n * *n; - minwrk = bdspac + *n * *n + *n * 3; - } else if (wntqa) { - -/* Path 4 (M much larger than N, JOBZ='A') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR", - " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, - "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *n * *n; - minwrk = bdspac + *n * *n + *n * 3; - } - } else { - -/* Path 5 (M at least N, but not much larger) */ - - wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - if (wntqn) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } else if (wntqo) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *n; -/* Computing MAX */ - i__1 = *m, i__2 = *n * *n + bdspac; - minwrk = *n * 3 + max(i__1,i__2); - } else if (wntqs) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } else if (wntqa) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = bdspac + *n * 3; - maxwrk = max(i__1,i__2); - minwrk = *n * 3 + max(*m,bdspac); - } - } - } else { - -/* Compute space needed for SBDSDC */ - - if (wntqn) { - bdspac = *m * 7; - } else { - bdspac = *m * 3 * *m + (*m << 2); - } - if (*n >= mnthr) { - if (wntqn) { - -/* Path 1t (N much larger than M, JOBZ='N') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m; - maxwrk = max(i__1,i__2); - minwrk = bdspac + *m; - } else if (wntqo) { - -/* Path 2t (N much larger than M, JOBZ='O') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + (*m << 1) * *m; - minwrk = bdspac + (*m << 1) * *m + *m * 3; - } else if (wntqs) { - -/* Path 3t (N much larger than M, JOBZ='S') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *m; - minwrk = bdspac + *m * *m + *m * 3; - } else if (wntqa) { - -/* Path 4t (N much larger than M, JOBZ='A') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ", - " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, - "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *m; - minwrk = bdspac + *m * *m + *m * 3; - } - } else { - -/* Path 5t (N greater than M, but not much larger) */ - - wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, - n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - if (wntqn) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } else if (wntqo) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - wrkbl = max(i__1,i__2); - maxwrk = wrkbl + *m * *n; -/* Computing MAX */ - i__1 = *n, i__2 = *m * *m + bdspac; - minwrk = *m * 3 + max(i__1,i__2); - } else if (wntqs) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } else if (wntqa) { -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" - , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = bdspac + *m * 3; - maxwrk = max(i__1,i__2); - minwrk = *m * 3 + max(*n,bdspac); - } - } - } - work[1] = (real) maxwrk; - } - - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGESDD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - if (*lwork >= 1) { - work[1] = 1.f; - } - return 0; - } - -/* Get machine constants */ - - eps = slamch_("P"); - smlnum = sqrt(slamch_("S")) / eps; - bignum = 1.f / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = slange_("M", m, n, &a[a_offset], lda, dum); - iscl = 0; - if (anrm > 0.f && anrm < smlnum) { - iscl = 1; - slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & - ierr); - } else if (anrm > bignum) { - iscl = 1; - slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & - ierr); - } - - if (*m >= *n) { - -/* - A has at least as many rows as columns. If A has sufficiently - more rows than columns, first reduce using the QR - decomposition (if sufficient workspace available) -*/ - - if (*m >= mnthr) { - - if (wntqn) { - -/* - Path 1 (M much larger than N, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need 2*N, prefer N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out below R */ - - i__1 = *n - 1; - i__2 = *n - 1; - slaset_("L", &i__1, &i__2, &c_b1101, &c_b1101, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (Workspace: need 4*N, prefer 3*N+2*N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nwork = ie + *n; - -/* - Perform bidiagonal SVD, computing singular values only - (Workspace: need N+BDSPAC) -*/ - - sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2 (M much larger than N, JOBZ = 'O') - N left singular vectors to be overwritten on A and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is LDWRKR by N */ - - if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { - ldwrkr = *lda; - } else { - ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; - } - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__1 = *n - 1; - i__2 = *n - 1; - slaset_("L", &i__1, &i__2, &c_b1101, &c_b1101, &work[ir + 1], - &ldwrkr); - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in VT, copying result to WORK(IR) - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* WORK(IU) is N by N */ - - iu = nwork; - nwork = iu + *n * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite WORK(IU) by left singular vectors of R - and VT by right singular vectors of R - (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &work[iu], n, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IU), storing result in WORK(IR) and copying to A - (Workspace: need 2*N*N, prefer N*N+M*N) -*/ - - i__1 = *m; - i__2 = ldwrkr; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - sgemm_("N", "N", &chunk, n, n, &c_b871, &a[i__ + a_dim1], - lda, &work[iu], n, &c_b1101, &work[ir], &ldwrkr); - slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L10: */ - } - - } else if (wntqs) { - -/* - Path 3 (M much larger than N, JOBZ='S') - N left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is N by N */ - - ldwrkr = *n; - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__2 = *n - 1; - i__1 = *n - 1; - slaset_("L", &i__2, &i__1, &c_b1101, &c_b1101, &work[ir + 1], - &ldwrkr); - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagoal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of R and VT - by right singular vectors of R - (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IR), storing result in U - (Workspace: need N*N) -*/ - - slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); - sgemm_("N", "N", m, n, n, &c_b871, &a[a_offset], lda, &work[ - ir], &ldwrkr, &c_b1101, &u[u_offset], ldu); - - } else if (wntqa) { - -/* - Path 4 (M much larger than N, JOBZ='A') - M left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - itau = iu + ldwrku * *n; - nwork = itau + *n; - -/* - Compute A=Q*R, copying result to U - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Generate Q in U - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - i__2 = *lwork - nwork + 1; - sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], - &i__2, &ierr); - -/* Produce R in A, zeroing out other entries */ - - i__2 = *n - 1; - i__1 = *n - 1; - slaset_("L", &i__2, &i__1, &c_b1101, &c_b1101, &a[a_dim1 + 2], - lda); - ie = itau; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite WORK(IU) by left singular vectors of R and VT - by right singular vectors of R - (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in U by left singular vectors of R in - WORK(IU), storing result in A - (Workspace: need N*N) -*/ - - sgemm_("N", "N", m, n, n, &c_b871, &u[u_offset], ldu, &work[ - iu], &ldwrku, &c_b1101, &a[a_offset], lda); - -/* Copy left singular vectors of A from A to U */ - - slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - - } - - } else { - -/* - M .LT. MNTHR - - Path 5 (M at least N, but not much larger) - Reduce to bidiagonal form without QR decomposition -*/ - - ie = 1; - itauq = ie + *n; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Perform bidiagonal SVD, only computing singular values - (Workspace: need N+BDSPAC) -*/ - - sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - if (*lwork >= *m * *n + *n * 3 + bdspac) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - nwork = iu + ldwrku * *n; - slaset_("F", m, n, &c_b1101, &c_b1101, &work[iu], &ldwrku); - } else { - -/* WORK( IU ) is N by N */ - - ldwrku = *n; - nwork = iu + ldwrku * *n; - -/* WORK(IR) is LDWRKR by N */ - - ir = nwork; - ldwrkr = (*lwork - *n * *n - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in WORK(IU) and computing right - singular vectors of bidiagonal matrix in VT - (Workspace: need N+N*N+BDSPAC) -*/ - - sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & - vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ - 1], info); - -/* - Overwrite VT by right singular vectors of A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - - if (*lwork >= *m * *n + *n * 3 + bdspac) { - -/* - Overwrite WORK(IU) by left singular vectors of A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - -/* Copy left singular vectors of A from WORK(IU) to A */ - - slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); - } else { - -/* - Generate Q in A - (Workspace: need N*N+2*N, prefer N*N+N+N*NB) -*/ - - i__2 = *lwork - nwork + 1; - sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by left singular vectors of - bidiagonal matrix in WORK(IU), storing result in - WORK(IR) and copying to A - (Workspace: need 2*N*N, prefer N*N+M*N) -*/ - - i__2 = *m; - i__1 = ldwrkr; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - sgemm_("N", "N", &chunk, n, n, &c_b871, &a[i__ + - a_dim1], lda, &work[iu], &ldwrku, &c_b1101, & - work[ir], &ldwrkr); - slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L20: */ - } - } - - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - slaset_("F", m, n, &c_b1101, &c_b1101, &u[u_offset], ldu); - sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 3*N, prefer 2*N+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else if (wntqa) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need N+BDSPAC) -*/ - - slaset_("F", m, m, &c_b1101, &c_b1101, &u[u_offset], ldu); - sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* Set the right corner of U to identity matrix */ - - i__1 = *m - *n; - i__2 = *m - *n; - slaset_("F", &i__1, &i__2, &c_b1101, &c_b871, &u[*n + 1 + (*n - + 1) * u_dim1], ldu); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } else { - -/* - A has more columns than rows. If A has sufficiently more - columns than rows, first reduce using the LQ decomposition (if - sufficient workspace available) -*/ - - if (*n >= mnthr) { - - if (wntqn) { - -/* - Path 1t (N much larger than M, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need 2*M, prefer M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out above L */ - - i__1 = *m - 1; - i__2 = *m - 1; - slaset_("U", &i__1, &i__2, &c_b1101, &c_b1101, &a[(a_dim1 << - 1) + 1], lda); - ie = 1; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (Workspace: need 4*M, prefer 3*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nwork = ie + *m; - -/* - Perform bidiagonal SVD, computing singular values only - (Workspace: need M+BDSPAC) -*/ - - sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2t (N much larger than M, JOBZ='O') - M right singular vectors to be overwritten on A and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* IVT is M by M */ - - il = ivt + *m * *m; - if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { - -/* WORK(IL) is M by N */ - - ldwrkl = *m; - chunk = *n; - } else { - ldwrkl = *m; - chunk = (*lwork - *m * *m) / *m; - } - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy L to WORK(IL), zeroing about above it */ - - slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__1 = *m - 1; - i__2 = *m - 1; - slaset_("U", &i__1, &i__2, &c_b1101, &c_b1101, &work[il + - ldwrkl], &ldwrkl); - -/* - Generate Q in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U, and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M+M*M+BDSPAC) -*/ - - sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], m, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of L and WORK(IVT) - by right singular vectors of L - (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by Q - in A, storing result in WORK(IL) and copying to A - (Workspace: need 2*M*M, prefer M*M+M*N) -*/ - - i__1 = *n; - i__2 = chunk; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - sgemm_("N", "N", m, &blk, m, &c_b871, &work[ivt], m, &a[ - i__ * a_dim1 + 1], lda, &c_b1101, &work[il], & - ldwrkl); - slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 - + 1], lda); -/* L30: */ - } - - } else if (wntqs) { - -/* - Path 3t (N much larger than M, JOBZ='S') - M right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - il = 1; - -/* WORK(IL) is M by M */ - - ldwrkl = *m; - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy L to WORK(IL), zeroing out above it */ - - slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__2 = *m - 1; - i__1 = *m - 1; - slaset_("U", &i__2, &i__1, &c_b1101, &c_b1101, &work[il + - ldwrkl], &ldwrkl); - -/* - Generate Q in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IU), copying result to U - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of L and VT - by right singular vectors of L - (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IL) by - Q in A, storing result in VT - (Workspace: need M*M) -*/ - - slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); - sgemm_("N", "N", m, n, m, &c_b871, &work[il], &ldwrkl, &a[ - a_offset], lda, &c_b1101, &vt[vt_offset], ldvt); - - } else if (wntqa) { - -/* - Path 4t (N much larger than M, JOBZ='A') - N right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* WORK(IVT) is M by M */ - - ldwkvt = *m; - itau = ivt + ldwkvt * *m; - nwork = itau + *m; - -/* - Compute A=L*Q, copying result to VT - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Generate Q in VT - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ - nwork], &i__2, &ierr); - -/* Produce L in A, zeroing out other entries */ - - i__2 = *m - 1; - i__1 = *m - 1; - slaset_("U", &i__2, &i__1, &c_b1101, &c_b1101, &a[(a_dim1 << - 1) + 1], lda); - ie = itau; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M+M*M+BDSPAC) -*/ - - sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] - , info); - -/* - Overwrite U by left singular vectors of L and WORK(IVT) - by right singular vectors of L - (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by - Q in VT, storing result in A - (Workspace: need M*M) -*/ - - sgemm_("N", "N", m, n, m, &c_b871, &work[ivt], &ldwkvt, &vt[ - vt_offset], ldvt, &c_b1101, &a[a_offset], lda); - -/* Copy right singular vectors of A from A to VT */ - - slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - - } - - } else { - -/* - N .LT. MNTHR - - Path 5t (N greater than M, but not much larger) - Reduce to bidiagonal form without LQ decomposition -*/ - - ie = 1; - itauq = ie + *m; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) -*/ - - i__2 = *lwork - nwork + 1; - sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & - work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Perform bidiagonal SVD, only computing singular values - (Workspace: need M+BDSPAC) -*/ - - sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, - dum, idum, &work[nwork], &iwork[1], info); - } else if (wntqo) { - ldwkvt = *m; - ivt = nwork; - if (*lwork >= *m * *n + *m * 3 + bdspac) { - -/* WORK( IVT ) is M by N */ - - slaset_("F", m, n, &c_b1101, &c_b1101, &work[ivt], & - ldwkvt); - nwork = ivt + ldwkvt * *n; - } else { - -/* WORK( IVT ) is M by M */ - - nwork = ivt + ldwkvt * *m; - il = nwork; - -/* WORK(IL) is M by CHUNK */ - - chunk = (*lwork - *m * *m - *m * 3) / *m; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in WORK(IVT) - (Workspace: need M*M+BDSPAC) -*/ - - sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & - work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] - , info); - -/* - Overwrite U by left singular vectors of A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *m * 3 + bdspac) { - -/* - Overwrite WORK(IVT) by left singular vectors of A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, - &ierr); - -/* Copy right singular vectors of A from WORK(IVT) to A */ - - slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); - } else { - -/* - Generate P**T in A - (Workspace: need M*M+2*M, prefer M*M+M+M*NB) -*/ - - i__2 = *lwork - nwork + 1; - sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by right singular vectors of - bidiagonal matrix in WORK(IVT), storing result in - WORK(IL) and copying to A - (Workspace: need 2*M*M, prefer M*M+M*N) -*/ - - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - sgemm_("N", "N", m, &blk, m, &c_b871, &work[ivt], & - ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b1101, & - work[il], m); - slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + - 1], lda); -/* L40: */ - } - } - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - slaset_("F", m, n, &c_b1101, &c_b1101, &vt[vt_offset], ldvt); - sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 3*M, prefer 2*M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else if (wntqa) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in U and computing right singular - vectors of bidiagonal matrix in VT - (Workspace: need M+BDSPAC) -*/ - - slaset_("F", n, n, &c_b1101, &c_b1101, &vt[vt_offset], ldvt); - sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ - vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], - info); - -/* Set the right corner of VT to identity matrix */ - - i__1 = *n - *m; - i__2 = *n - *m; - slaset_("F", &i__1, &i__2, &c_b1101, &c_b871, &vt[*m + 1 + (* - m + 1) * vt_dim1], ldvt); - -/* - Overwrite U by left singular vectors of A and VT - by right singular vectors of A - (Workspace: need 2*M+N, prefer 2*M+N*NB) -*/ - - i__1 = *lwork - nwork + 1; - sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - i__1 = *lwork - nwork + 1; - sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } - -/* Undo scaling if necessary */ - - if (iscl == 1) { - if (anrm > bignum) { - slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - if (anrm < smlnum) { - slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - } - -/* Return optimal workspace in WORK(1) */ - - work[1] = (real) maxwrk; - - return 0; - -/* End of SGESDD */ - -} /* sgesdd_ */ - -/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, - integer *ipiv, real *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_( - integer *, integer *, real *, integer *, integer *, integer *), - sgetrs_(char *, integer *, integer *, real *, integer *, integer * - , real *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SGESV computes the solution to a real system of linear equations - A * X = B, - where A is an N-by-N matrix and X and B are N-by-NRHS matrices. - - The LU decomposition with partial pivoting and row interchanges is - used to factor A as - A = P * L * U, - where P is a permutation matrix, L is unit lower triangular, and U is - upper triangular. The factored form of A is then used to solve the - system of equations A * X = B. - - Arguments - ========= - - N (input) INTEGER - The number of linear equations, i.e., the order of the - matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the N-by-N coefficient matrix A. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (output) INTEGER array, dimension (N) - The pivot indices that define the permutation matrix P; - row i of the matrix was interchanged with row IPIV(i). - - B (input/output) REAL array, dimension (LDB,NRHS) - On entry, the N-by-NRHS matrix of right hand side matrix B. - On exit, if INFO = 0, the N-by-NRHS solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, so the solution could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*nrhs < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGESV ", &i__1); - return 0; - } - -/* Compute the LU factorization of A. */ - - sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); - if (*info == 0) { - -/* Solve the system A*X = B, overwriting B with X. */ - - sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ - b_offset], ldb, info); - } - return 0; - -/* End of SGESV */ - -} /* sgesv_ */ - -/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, - integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - - /* Local variables */ - static integer j, jp; - extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, - integer *, real *, integer *, real *, integer *), sscal_(integer * - , real *, real *, integer *), sswap_(integer *, real *, integer *, - real *, integer *), xerbla_(char *, integer *); - extern integer isamax_(integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1992 - - - Purpose - ======= - - SGETF2 computes an LU factorization of a general m-by-n matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 2 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the m by n matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, U(k,k) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGETF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - -/* Find pivot and test for singularity. */ - - i__2 = *m - j + 1; - jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1); - ipiv[j] = jp; - if (a[jp + j * a_dim1] != 0.f) { - -/* Apply the interchange to columns 1:N. */ - - if (jp != j) { - sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); - } - -/* Compute elements J+1:M of J-th column. */ - - if (j < *m) { - i__2 = *m - j; - r__1 = 1.f / a[j + j * a_dim1]; - sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); - } - - } else if (*info == 0) { - - *info = j; - } - - if (j < min(*m,*n)) { - -/* Update trailing submatrix. */ - - i__2 = *m - j; - i__3 = *n - j; - sger_(&i__2, &i__3, &c_b1150, &a[j + 1 + j * a_dim1], &c__1, &a[j - + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], - lda); - } -/* L10: */ - } - return 0; - -/* End of SGETF2 */ - -} /* sgetf2_ */ - -/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, - integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - - /* Local variables */ - static integer i__, j, jb, nb, iinfo; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *), strsm_(char *, char *, char *, - char *, integer *, integer *, real *, real *, integer *, real *, - integer *), sgetf2_(integer *, - integer *, real *, integer *, integer *, integer *), xerbla_(char - *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer - *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SGETRF computes an LU factorization of a general M-by-N matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 3 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the M-by-N matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGETRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - if (nb <= 1 || nb >= min(*m,*n)) { - -/* Use unblocked code. */ - - sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); - } else { - -/* Use blocked code. */ - - i__1 = min(*m,*n); - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__3 = min(*m,*n) - j + 1; - jb = min(i__3,nb); - -/* - Factor diagonal and subdiagonal blocks and test for exact - singularity. -*/ - - i__3 = *m - j + 1; - sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); - -/* Adjust INFO and the pivot indices. */ - - if (*info == 0 && iinfo > 0) { - *info = iinfo + j - 1; - } -/* Computing MIN */ - i__4 = *m, i__5 = j + jb - 1; - i__3 = min(i__4,i__5); - for (i__ = j; i__ <= i__3; ++i__) { - ipiv[i__] = j - 1 + ipiv[i__]; -/* L10: */ - } - -/* Apply interchanges to columns 1:J-1. */ - - i__3 = j - 1; - i__4 = j + jb - 1; - slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); - - if (j + jb <= *n) { - -/* Apply interchanges to columns J+JB:N. */ - - i__3 = *n - j - jb + 1; - i__4 = j + jb - 1; - slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & - ipiv[1], &c__1); - -/* Compute block row of U. */ - - i__3 = *n - j - jb + 1; - strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & - c_b871, &a[j + j * a_dim1], lda, &a[j + (j + jb) * - a_dim1], lda); - if (j + jb <= *m) { - -/* Update trailing submatrix. */ - - i__3 = *m - j - jb + 1; - i__4 = *n - j - jb + 1; - sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, - &c_b1150, &a[j + jb + j * a_dim1], lda, &a[j + (j - + jb) * a_dim1], lda, &c_b871, &a[j + jb + (j + - jb) * a_dim1], lda); - } - } -/* L20: */ - } - } - return 0; - -/* End of SGETRF */ - -} /* sgetrf_ */ - -/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, - integer *lda, integer *ipiv, real *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int strsm_(char *, char *, char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer * - ), xerbla_(char *, integer *); - static logical notran; - extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer - *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SGETRS solves a system of linear equations - A * X = B or A' * X = B - with a general N-by-N matrix A using the LU factorization computed - by SGETRF. - - Arguments - ========= - - TRANS (input) CHARACTER*1 - Specifies the form of the system of equations: - = 'N': A * X = B (No transpose) - = 'T': A'* X = B (Transpose) - = 'C': A'* X = B (Conjugate transpose = Transpose) - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) REAL array, dimension (LDA,N) - The factors L and U from the factorization A = P*L*U - as computed by SGETRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (input) INTEGER array, dimension (N) - The pivot indices from SGETRF; for 1<=i<=N, row i of the - matrix was interchanged with row IPIV(i). - - B (input/output) REAL array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - notran = lsame_(trans, "N"); - if (! notran && ! lsame_(trans, "T") && ! lsame_( - trans, "C")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SGETRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (notran) { - -/* - Solve A * X = B. - - Apply row interchanges to the right hand sides. -*/ - - slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); - -/* Solve L*X = B, overwriting B with X. */ - - strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b871, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b871, - &a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A' * X = B. - - Solve U'*X = B, overwriting B with X. -*/ - - strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b871, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Apply row interchanges to the solution vectors. */ - - slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); - } - - return 0; - -/* End of SGETRS */ - -} /* sgetrs_ */ - -/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo, - integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, - integer *ldz, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4, - i__5; - real r__1, r__2; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__, j, k, l; - static real s[225] /* was [15][15] */, v[16]; - static integer i1, i2, ii, nh, nr, ns, nv; - static real vv[16]; - static integer itn; - static real tau; - static integer its; - static real ulp, tst1; - static integer maxb; - static real absw; - static integer ierr; - static real unfl, temp, ovfl; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static integer itemp; - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *); - static logical initz, wantt; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - static logical wantz; - extern doublereal slapy2_(real *, real *); - extern /* Subroutine */ int slabad_(real *, real *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, - real *); - extern integer isamax_(integer *, real *, integer *); - extern doublereal slanhs_(char *, integer *, real *, integer *, real *); - extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, - integer *, integer *, real *, integer *, real *, real *, integer * - , integer *, real *, integer *, integer *), slacpy_(char *, - integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, - real *, integer *), slarfx_(char *, integer *, integer *, - real *, real *, real *, integer *, real *); - static real smlnum; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H - and, optionally, the matrices T and Z from the Schur decomposition - H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur - form), and Z is the orthogonal matrix of Schur vectors. - - Optionally Z may be postmultiplied into an input orthogonal matrix Q, - so that this routine can give the Schur factorization of a matrix A - which has been reduced to the Hessenberg form H by the orthogonal - matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. - - Arguments - ========= - - JOB (input) CHARACTER*1 - = 'E': compute eigenvalues only; - = 'S': compute eigenvalues and the Schur form T. - - COMPZ (input) CHARACTER*1 - = 'N': no Schur vectors are computed; - = 'I': Z is initialized to the unit matrix and the matrix Z - of Schur vectors of H is returned; - = 'V': Z must contain an orthogonal matrix Q on entry, and - the product Q*Z is returned. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to SGEBAL, and then passed to SGEHRD - when the matrix output by SGEBAL is reduced to Hessenberg - form. Otherwise ILO and IHI should be set to 1 and N - respectively. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - H (input/output) REAL array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if JOB = 'S', H contains the upper quasi-triangular - matrix T from the Schur decomposition (the Schur form); - 2-by-2 diagonal blocks (corresponding to complex conjugate - pairs of eigenvalues) are returned in standard form, with - H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', - the contents of H are unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - WR (output) REAL array, dimension (N) - WI (output) REAL array, dimension (N) - The real and imaginary parts, respectively, of the computed - eigenvalues. If two eigenvalues are computed as a complex - conjugate pair, they are stored in consecutive elements of - WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and - WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the - same order as on the diagonal of the Schur form returned in - H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 - diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and - WI(i+1) = -WI(i). - - Z (input/output) REAL array, dimension (LDZ,N) - If COMPZ = 'N': Z is not referenced. - If COMPZ = 'I': on entry, Z need not be set, and on exit, Z - contains the orthogonal matrix Z of the Schur vectors of H. - If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, - which is assumed to be equal to the unit matrix except for - the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. - Normally Q is the orthogonal matrix generated by SORGHR after - the call to SGEHRD which formed the Hessenberg matrix H. - - LDZ (input) INTEGER - The leading dimension of the array Z. - LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, SHSEQR failed to compute all of the - eigenvalues in a total of 30*(IHI-ILO+1) iterations; - elements 1:ilo-1 and i+1:n of WR and WI contain those - eigenvalues which have been successfully computed. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --wr; - --wi; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - wantt = lsame_(job, "S"); - initz = lsame_(compz, "I"); - wantz = initz || lsame_(compz, "V"); - - *info = 0; - work[1] = (real) max(1,*n); - lquery = *lwork == -1; - if (! lsame_(job, "E") && ! wantt) { - *info = -1; - } else if (! lsame_(compz, "N") && ! wantz) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*ldh < max(1,*n)) { - *info = -7; - } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { - *info = -11; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SHSEQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Initialize Z, if necessary */ - - if (initz) { - slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz); - } - -/* Store the eigenvalues isolated by SGEBAL. */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.f; -/* L10: */ - } - i__1 = *n; - for (i__ = *ihi + 1; i__ <= i__1; ++i__) { - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.f; -/* L20: */ - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - wr[*ilo] = h__[*ilo + *ilo * h_dim1]; - wi[*ilo] = 0.f; - return 0; - } - -/* - Set rows and columns ILO to IHI to zero below the first - subdiagonal. -*/ - - i__1 = *ihi - 2; - for (j = *ilo; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j + 2; i__ <= i__2; ++i__) { - h__[i__ + j * h_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } - nh = *ihi - *ilo + 1; - -/* - Determine the order of the multi-shift QR algorithm to be used. - - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = job; - i__3[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = job; - i__3[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (ns <= 2 || ns > nh || maxb >= nh) { - -/* Use the standard double-shift algorithm */ - - slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ - 1], ilo, ihi, &z__[z_offset], ldz, info); - return 0; - } - maxb = max(3,maxb); -/* Computing MIN */ - i__1 = min(ns,maxb); - ns = min(i__1,15); - -/* - Now 2 < NS <= MAXB < NH. - - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = slamch_("Safe minimum"); - ovfl = 1.f / unfl; - slabad_(&unfl, &ovfl); - ulp = slamch_("Precision"); - smlnum = unfl * (nh / ulp); - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of multiple-shift QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of at most MAXB. Each iteration of the loop - works with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L50: - l = *ilo; - if (i__ < *ilo) { - goto L170; - } - -/* - Perform multiple-shift QR iterations on rows and columns ILO to I - until a submatrix of order at most MAXB splits off at the bottom - because a subdiagonal element has become negligible. -*/ - - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 - = h__[k + k * h_dim1], dabs(r__2)); - if (tst1 == 0.f) { - i__4 = i__ - l + 1; - tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1] - ); - } -/* Computing MAX */ - r__2 = ulp * tst1; - if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2, - smlnum)) { - goto L70; - } -/* L60: */ - } -L70: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible. */ - - h__[l + (l - 1) * h_dim1] = 0.f; - } - -/* Exit from loop if a submatrix of order <= MAXB has split off. */ - - if (l >= i__ - maxb + 1) { - goto L160; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! wantt) { - i1 = l; - i2 = i__; - } - - if (its == 20 || its == 30) { - -/* Exceptional shifts. */ - - i__2 = i__; - for (ii = i__ - ns + 1; ii <= i__2; ++ii) { - wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + ( - r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f; - wi[ii] = 0.f; -/* L80: */ - } - } else { - -/* Use eigenvalues of trailing submatrix of order NS as shifts. */ - - slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * - h_dim1], ldh, s, &c__15); - slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ - - ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset], - ldz, &ierr); - if (ierr > 0) { - -/* - If SLAHQR failed to compute all NS eigenvalues, use the - unconverged diagonal elements as the remaining shifts. -*/ - - i__2 = ierr; - for (ii = 1; ii <= i__2; ++ii) { - wr[i__ - ns + ii] = s[ii + ii * 15 - 16]; - wi[i__ - ns + ii] = 0.f; -/* L90: */ - } - } - } - -/* - Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) - where G is the Hessenberg submatrix H(L:I,L:I) and w is - the vector of shifts (stored in WR and WI). The result is - stored in the local array V. -*/ - - v[0] = 1.f; - i__2 = ns + 1; - for (ii = 2; ii <= i__2; ++ii) { - v[ii - 1] = 0.f; -/* L100: */ - } - nv = 1; - i__2 = i__; - for (j = i__ - ns + 1; j <= i__2; ++j) { - if (wi[j] >= 0.f) { - if (wi[j] == 0.f) { - -/* real shift */ - - i__4 = nv + 1; - scopy_(&i__4, v, &c__1, vv, &c__1); - i__4 = nv + 1; - r__1 = -wr[j]; - sgemv_("No transpose", &i__4, &nv, &c_b871, &h__[l + l * - h_dim1], ldh, vv, &c__1, &r__1, v, &c__1); - ++nv; - } else if (wi[j] > 0.f) { - -/* complex conjugate pair of shifts */ - - i__4 = nv + 1; - scopy_(&i__4, v, &c__1, vv, &c__1); - i__4 = nv + 1; - r__1 = wr[j] * -2.f; - sgemv_("No transpose", &i__4, &nv, &c_b871, &h__[l + l * - h_dim1], ldh, v, &c__1, &r__1, vv, &c__1); - i__4 = nv + 1; - itemp = isamax_(&i__4, vv, &c__1); -/* Computing MAX */ - r__2 = (r__1 = vv[itemp - 1], dabs(r__1)); - temp = 1.f / dmax(r__2,smlnum); - i__4 = nv + 1; - sscal_(&i__4, &temp, vv, &c__1); - absw = slapy2_(&wr[j], &wi[j]); - temp = temp * absw * absw; - i__4 = nv + 2; - i__5 = nv + 1; - sgemv_("No transpose", &i__4, &i__5, &c_b871, &h__[l + l * - h_dim1], ldh, vv, &c__1, &temp, v, &c__1); - nv += 2; - } - -/* - Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, - reset it to the unit vector. -*/ - - itemp = isamax_(&nv, v, &c__1); - temp = (r__1 = v[itemp - 1], dabs(r__1)); - if (temp == 0.f) { - v[0] = 1.f; - i__4 = nv; - for (ii = 2; ii <= i__4; ++ii) { - v[ii - 1] = 0.f; -/* L110: */ - } - } else { - temp = dmax(temp,smlnum); - r__1 = 1.f / temp; - sscal_(&nv, &r__1, v, &c__1); - } - } -/* L120: */ - } - -/* Multiple-shift QR step */ - - i__2 = i__ - 1; - for (k = l; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__4 = ns + 1, i__5 = i__ - k + 1; - nr = min(i__4,i__5); - if (k > l) { - scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - slarfg_(&nr, v, &v[1], &c__1, &tau); - if (k > l) { - h__[k + (k - 1) * h_dim1] = v[0]; - i__4 = i__; - for (ii = k + 1; ii <= i__4; ++ii) { - h__[ii + (k - 1) * h_dim1] = 0.f; -/* L130: */ - } - } - v[0] = 1.f; - -/* - Apply G from the left to transform the rows of the matrix in - columns K to I2. -*/ - - i__4 = i2 - k + 1; - slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, & - work[1]); - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+NR,I). - - Computing MIN -*/ - i__5 = k + nr; - i__4 = min(i__5,i__) - i1 + 1; - slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, - &work[1]); - - if (wantz) { - -/* Accumulate transformations in the matrix Z */ - - slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], - ldz, &work[1]); - } -/* L140: */ - } - -/* L150: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L160: - -/* - A submatrix of order <= MAXB in rows and columns L to I has split - off. Use the double-shift QR algorithm to handle it. -*/ - - slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1], - ilo, ihi, &z__[z_offset], ldz, info); - if (*info > 0) { - return 0; - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with a new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L50; - -L170: - work[1] = (real) max(1,*n); - return 0; - -/* End of SHSEQR */ - -} /* shseqr_ */ - -/* Subroutine */ int slabad_(real *small, real *large) -{ - /* Builtin functions */ - double r_lg10(real *), sqrt(doublereal); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLABAD takes as input the values computed by SLAMCH for underflow and - overflow, and returns the square root of each of these values if the - log of LARGE is sufficiently large. This subroutine is intended to - identify machines with a large exponent range, such as the Crays, and - redefine the underflow and overflow limits to be the square roots of - the values computed by SLAMCH. This subroutine is needed because - SLAMCH does not compensate for poor arithmetic in the upper half of - the exponent range, as is found on a Cray. - - Arguments - ========= - - SMALL (input/output) REAL - On entry, the underflow threshold as computed by SLAMCH. - On exit, if LOG10(LARGE) is sufficiently large, the square - root of SMALL, otherwise unchanged. - - LARGE (input/output) REAL - On entry, the overflow threshold as computed by SLAMCH. - On exit, if LOG10(LARGE) is sufficiently large, the square - root of LARGE, otherwise unchanged. - - ===================================================================== - - - If it looks like we're on a Cray, take the square root of - SMALL and LARGE to avoid overflow and underflow problems. -*/ - - if (r_lg10(large) > 2e3f) { - *small = sqrt(*small); - *large = sqrt(*large); - } - - return 0; - -/* End of SLABAD */ - -} /* slabad_ */ - -/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a, - integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, - integer *ldx, real *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, - i__3; - - /* Local variables */ - static integer i__; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sgemv_(char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *), slarfg_( - integer *, real *, real *, integer *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLABRD reduces the first NB rows and columns of a real general - m by n matrix A to upper or lower bidiagonal form by an orthogonal - transformation Q' * A * P, and returns the matrices X and Y which - are needed to apply the transformation to the unreduced part of A. - - If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower - bidiagonal form. - - This is an auxiliary routine called by SGEBRD - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. - - N (input) INTEGER - The number of columns in the matrix A. - - NB (input) INTEGER - The number of leading rows and columns of A to be reduced. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, the first NB rows and columns of the matrix are - overwritten; the rest of the array is unchanged. - If m >= n, elements on and below the diagonal in the first NB - columns, with the array TAUQ, represent the orthogonal - matrix Q as a product of elementary reflectors; and - elements above the diagonal in the first NB rows, with the - array TAUP, represent the orthogonal matrix P as a product - of elementary reflectors. - If m < n, elements below the diagonal in the first NB - columns, with the array TAUQ, represent the orthogonal - matrix Q as a product of elementary reflectors, and - elements on and above the diagonal in the first NB rows, - with the array TAUP, represent the orthogonal matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) REAL array, dimension (NB) - The diagonal elements of the first NB rows and columns of - the reduced matrix. D(i) = A(i,i). - - E (output) REAL array, dimension (NB) - The off-diagonal elements of the first NB rows and columns of - the reduced matrix. - - TAUQ (output) REAL array dimension (NB) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix Q. See Further Details. - - TAUP (output) REAL array, dimension (NB) - The scalar factors of the elementary reflectors which - represent the orthogonal matrix P. See Further Details. - - X (output) REAL array, dimension (LDX,NB) - The m-by-nb matrix X required to update the unreduced part - of A. - - LDX (input) INTEGER - The leading dimension of the array X. LDX >= M. - - Y (output) REAL array, dimension (LDY,NB) - The n-by-nb matrix Y required to update the unreduced part - of A. - - LDY (output) INTEGER - The leading dimension of the array Y. LDY >= N. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are real scalars, and v and u are real vectors. - - If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in - A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The elements of the vectors v and u together form the m-by-nb matrix - V and the nb-by-n matrix U' which are needed, with X and Y, to apply - the transformation to the unreduced part of the matrix, using a block - update of the form: A := A - V*Y' - X*U'. - - The contents of A on exit are illustrated by the following examples - with nb = 2: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) - ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) - ( v1 v2 a a a ) ( v1 1 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix which is unchanged, - vi denotes an element of the vector defining H(i), and ui an element - of the vector defining G(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:m,i) */ - - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + a_dim1], - lda, &y[i__ + y_dim1], ldy, &c_b871, &a[i__ + i__ * - a_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + x_dim1], - ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b871, &a[i__ + i__ * - a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * - a_dim1], &c__1, &tauq[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - a[i__ + i__ * a_dim1] = 1.f; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + (i__ + 1) - * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, & - c_b1101, &y[i__ + 1 + i__ * y_dim1], &c__1) - ; - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + a_dim1], - lda, &a[i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b871, & - y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &x[i__ + x_dim1], - ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(i__ + 1) * - a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b871, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - -/* Update A(i,i+1:n) */ - - i__2 = *n - i__; - sgemv_("No transpose", &i__2, &i__, &c_b1150, &y[i__ + 1 + - y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b871, &a[i__ - + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(i__ + 1) * - a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b871, &a[ - i__ + (i__ + 1) * a_dim1], lda); - -/* Generate reflection P(i) to annihilate A(i,i+2:n) */ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( - i__3,*n) * a_dim1], lda, &taup[i__]); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - a[i__ + (i__ + 1) * a_dim1] = 1.f; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + ( - i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], - lda, &c_b1101, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - sgemv_("Transpose", &i__2, &i__, &c_b871, &y[i__ + 1 + y_dim1] - , ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b1101, & - x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - sgemv_("No transpose", &i__2, &i__, &c_b1150, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b871, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b1101, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b871, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i,i:n) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + y_dim1], - ldy, &a[i__ + a_dim1], lda, &c_b871, &a[i__ + i__ * - a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[i__ * a_dim1 + 1], - lda, &x[i__ + x_dim1], ldx, &c_b871, &a[i__ + i__ * - a_dim1], lda); - -/* Generate reflection P(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * - a_dim1], lda, &taup[i__]); - d__[i__] = a[i__ + i__ * a_dim1]; - if (i__ < *m) { - a[i__ + i__ * a_dim1] = 1.f; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; - sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + - i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, & - c_b1101, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &y[i__ + y_dim1], - ldy, &a[i__ + i__ * a_dim1], lda, &c_b1101, &x[i__ * - x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b871, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ * a_dim1 - + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1101, &x[ - i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b871, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - -/* Update A(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 + - a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b871, &a[i__ - + 1 + i__ * a_dim1], &c__1); - i__2 = *m - i__; - sgemv_("No transpose", &i__2, &i__, &c_b1150, &x[i__ + 1 + - x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b871, & - a[i__ + 1 + i__ * a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ - - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + - i__ * a_dim1], &c__1, &tauq[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.f; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + (i__ - + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], & - c__1, &c_b1101, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b1101, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b871, & - y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - sgemv_("Transpose", &i__2, &i__, &c_b871, &x[i__ + 1 + x_dim1] - , ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1101, & - y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - sgemv_("Transpose", &i__, &i__2, &c_b1150, &a[(i__ + 1) * - a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b871, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - } -/* L20: */ - } - } - return 0; - -/* End of SLABRD */ - -} /* slabrd_ */ - -/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a, - integer *lda, real *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLACPY copies all or part of a two-dimensional matrix A to another - matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) REAL array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper triangle - or trapezoid is accessed; if UPLO = 'L', only the lower - triangle or trapezoid is accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) REAL array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L30: */ - } -/* L40: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; -/* L50: */ - } -/* L60: */ - } - } - return 0; - -/* End of SLACPY */ - -} /* slacpy_ */ - -/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p, - real *q) -{ - static real e, f; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLADIV performs complex division in real arithmetic - - a + i*b - p + i*q = --------- - c + i*d - - The algorithm is due to Robert L. Smith and can be found - in D. Knuth, The art of Computer Programming, Vol.2, p.195 - - Arguments - ========= - - A (input) REAL - B (input) REAL - C (input) REAL - D (input) REAL - The scalars a, b, c, and d in the above expression. - - P (output) REAL - Q (output) REAL - The scalars p and q in the above expression. - - ===================================================================== -*/ - - - if (dabs(*d__) < dabs(*c__)) { - e = *d__ / *c__; - f = *c__ + *d__ * e; - *p = (*a + *b * e) / f; - *q = (*b - *a * e) / f; - } else { - e = *c__ / *d__; - f = *d__ + *c__ * e; - *p = (*b + *a * e) / f; - *q = (-(*a) + *b * e) / f; - } - - return 0; - -/* End of SLADIV */ - -} /* sladiv_ */ - -/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2) -{ - /* System generated locals */ - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real ab, df, tb, sm, rt, adf, acmn, acmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix - [ A B ] - [ B C ]. - On return, RT1 is the eigenvalue of larger absolute value, and RT2 - is the eigenvalue of smaller absolute value. - - Arguments - ========= - - A (input) REAL - The (1,1) element of the 2-by-2 matrix. - - B (input) REAL - The (1,2) and (2,1) elements of the 2-by-2 matrix. - - C (input) REAL - The (2,2) element of the 2-by-2 matrix. - - RT1 (output) REAL - The eigenvalue of larger absolute value. - - RT2 (output) REAL - The eigenvalue of smaller absolute value. - - Further Details - =============== - - RT1 is accurate to a few ulps barring over/underflow. - - RT2 may be inaccurate if there is massive cancellation in the - determinant A*C-B*B; higher precision or correctly rounded or - correctly truncated arithmetic would be needed to compute RT2 - accurately in all cases. - - Overflow is possible only if RT1 is within a factor of 5 of overflow. - Underflow is harmless if the input data is 0 or exceeds - underflow_threshold / macheps. - - ===================================================================== - - - Compute the eigenvalues -*/ - - sm = *a + *c__; - df = *a - *c__; - adf = dabs(df); - tb = *b + *b; - ab = dabs(tb); - if (dabs(*a) > dabs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - r__1 = ab / adf; - rt = adf * sqrt(r__1 * r__1 + 1.f); - } else if (adf < ab) { -/* Computing 2nd power */ - r__1 = adf / ab; - rt = ab * sqrt(r__1 * r__1 + 1.f); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.f); - } - if (sm < 0.f) { - *rt1 = (sm - rt) * .5f; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.f) { - *rt1 = (sm + rt) * .5f; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5f; - *rt2 = rt * -.5f; - } - return 0; - -/* End of SLAE2 */ - -} /* slae2_ */ - -/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real - *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, - real *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; - real r__1; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; - static real temp; - static integer curr; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer iperm, indxq, iwrem; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - static integer iqptr, tlvls; - extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *, - integer *, real *, integer *, real *, integer *, integer *), - slaed7_(integer *, integer *, integer *, integer *, integer *, - integer *, real *, real *, integer *, integer *, real *, integer * - , real *, integer *, integer *, integer *, integer *, integer *, - real *, real *, integer *, integer *); - static integer igivcl; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer igivnm, submat; - extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, - integer *, real *, integer *); - static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz; - extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, - real *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAED0 computes all eigenvalues and corresponding eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - = 2: Compute eigenvalues and eigenvectors of tridiagonal - matrix. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the main diagonal of the tridiagonal matrix. - On exit, its eigenvalues. - - E (input) REAL array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Q (input/output) REAL array, dimension (LDQ, N) - On entry, Q must contain an N-by-N orthogonal matrix. - If ICOMPQ = 0 Q is not referenced. - If ICOMPQ = 1 On entry, Q is a subset of the columns of the - orthogonal matrix used to reduce the full - matrix to tridiagonal form corresponding to - the subset of the full matrix which is being - decomposed at this time. - If ICOMPQ = 2 On entry, Q will be the identity matrix. - On exit, Q contains the eigenvectors of the - tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. If eigenvectors are - desired, then LDQ >= max(1,N). In any case, LDQ >= 1. - - QSTORE (workspace) REAL array, dimension (LDQS, N) - Referenced only when ICOMPQ = 1. Used to store parts of - the eigenvector matrix when the updating matrix multiplies - take place. - - LDQS (input) INTEGER - The leading dimension of the array QSTORE. If ICOMPQ = 1, - then LDQS >= max(1,N). In any case, LDQS >= 1. - - WORK (workspace) REAL array, - If ICOMPQ = 0 or 1, the dimension of WORK must be at least - 1 + 3*N + 2*N*lg N + 2*N**2 - ( lg( N ) = smallest integer k - such that 2^k >= N ) - If ICOMPQ = 2, the dimension of WORK must be at least - 4*N + N**2. - - IWORK (workspace) INTEGER array, - If ICOMPQ = 0 or 1, the dimension of IWORK must be at least - 6 + 6*N + 5*N*lg N. - ( lg( N ) = smallest integer k - such that 2^k >= N ) - If ICOMPQ = 2, the dimension of IWORK must be at least - 3 + 5*N. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - qstore_dim1 = *ldqs; - qstore_offset = 1 + qstore_dim1; - qstore -= qstore_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 2) { - *info = -1; - } else if (*icompq == 1 && *qsiz < max(0,*n)) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -7; - } else if (*ldqs < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED0", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Determine the size and placement of the submatrices, and save in - the leading elements of IWORK. -*/ - - iwork[1] = *n; - subpbs = 1; - tlvls = 0; -L10: - if (iwork[subpbs] > smlsiz) { - for (j = subpbs; j >= 1; --j) { - iwork[j * 2] = (iwork[j] + 1) / 2; - iwork[(j << 1) - 1] = iwork[j] / 2; -/* L20: */ - } - ++tlvls; - subpbs <<= 1; - goto L10; - } - i__1 = subpbs; - for (j = 2; j <= i__1; ++j) { - iwork[j] += iwork[j - 1]; -/* L30: */ - } - -/* - Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 - using rank-1 modifications (cuts). -*/ - - spm1 = subpbs - 1; - i__1 = spm1; - for (i__ = 1; i__ <= i__1; ++i__) { - submat = iwork[i__] + 1; - smm1 = submat - 1; - d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); - d__[submat] -= (r__1 = e[smm1], dabs(r__1)); -/* L40: */ - } - - indxq = (*n << 2) + 3; - if (*icompq != 2) { - -/* - Set up workspaces for eigenvalues only/accumulate new vectors - routine -*/ - - temp = log((real) (*n)) / log(2.f); - lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - iprmpt = indxq + *n + 1; - iperm = iprmpt + *n * lgn; - iqptr = iperm + *n * lgn; - igivpt = iqptr + *n + 2; - igivcl = igivpt + *n * lgn; - - igivnm = 1; - iq = igivnm + (*n << 1) * lgn; -/* Computing 2nd power */ - i__1 = *n; - iwrem = iq + i__1 * i__1 + 1; - -/* Initialize pointers */ - - i__1 = subpbs; - for (i__ = 0; i__ <= i__1; ++i__) { - iwork[iprmpt + i__] = 1; - iwork[igivpt + i__] = 1; -/* L50: */ - } - iwork[iqptr] = 1; - } - -/* - Solve each submatrix eigenproblem at the bottom of the divide and - conquer tree. -*/ - - curr = 0; - i__1 = spm1; - for (i__ = 0; i__ <= i__1; ++i__) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[1]; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 1] - iwork[i__]; - } - if (*icompq == 2) { - ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + - submat * q_dim1], ldq, &work[1], info); - if (*info != 0) { - goto L130; - } - } else { - ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + - iwork[iqptr + curr]], &matsiz, &work[1], info); - if (*info != 0) { - goto L130; - } - if (*icompq == 1) { - sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b871, &q[submat * - q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], - &matsiz, &c_b1101, &qstore[submat * qstore_dim1 + 1], - ldqs); - } -/* Computing 2nd power */ - i__2 = matsiz; - iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; - ++curr; - } - k = 1; - i__2 = iwork[i__ + 1]; - for (j = submat; j <= i__2; ++j) { - iwork[indxq + j] = k; - ++k; -/* L60: */ - } -/* L70: */ - } - -/* - Successively merge eigensystems of adjacent submatrices - into eigensystem for the corresponding larger matrix. - - while ( SUBPBS > 1 ) -*/ - - curlvl = 1; -L80: - if (subpbs > 1) { - spm2 = subpbs - 2; - i__1 = spm2; - for (i__ = 0; i__ <= i__1; i__ += 2) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[2]; - msd2 = iwork[1]; - curprb = 0; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 2] - iwork[i__]; - msd2 = matsiz / 2; - ++curprb; - } - -/* - Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) - into an eigensystem of size MATSIZ. - SLAED1 is used only for the full eigensystem of a tridiagonal - matrix. - SLAED7 handles the cases in which eigenvalues only or eigenvalues - and eigenvectors of a full symmetric matrix (which was reduced to - tridiagonal form) are desired. -*/ - - if (*icompq == 2) { - slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], - ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & - msd2, &work[1], &iwork[subpbs + 1], info); - } else { - slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ - submat], &qstore[submat * qstore_dim1 + 1], ldqs, & - iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & - work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] - , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & - work[iwrem], &iwork[subpbs + 1], info); - } - if (*info != 0) { - goto L130; - } - iwork[i__ / 2 + 1] = iwork[i__ + 2]; -/* L90: */ - } - subpbs /= 2; - ++curlvl; - goto L80; - } - -/* - end while - - Re-merge the eigenvalues/vectors which were deflated at the final - merge step. -*/ - - if (*icompq == 1) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; - scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 - + 1], &c__1); -/* L100: */ - } - scopy_(n, &work[1], &c__1, &d__[1], &c__1); - } else if (*icompq == 2) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; - scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); -/* L110: */ - } - scopy_(n, &work[1], &c__1, &d__[1], &c__1); - slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - work[i__] = d__[j]; -/* L120: */ - } - scopy_(n, &work[1], &c__1, &d__[1], &c__1); - } - goto L140; - -L130: - *info = submat * (*n + 1) + submat + matsiz - 1; - -L140: - return 0; - -/* End of SLAED0 */ - -} /* slaed0_ */ - -/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq, - integer *indxq, real *rho, integer *cutpnt, real *work, integer * - iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Local variables */ - static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), slaed2_(integer *, integer *, integer *, real *, real - *, integer *, integer *, real *, real *, real *, real *, real *, - integer *, integer *, integer *, integer *, integer *), slaed3_( - integer *, integer *, integer *, real *, real *, integer *, real * - , real *, real *, integer *, integer *, real *, real *, integer *) - ; - static integer idlmda; - extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( - integer *, integer *, real *, integer *, integer *, integer *); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAED1 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles - the case in which eigenvalues only or eigenvalues and eigenvectors - of a full symmetric matrix (which was reduced to tridiagonal form) - are desired. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine SLAED2. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine SLAED4 (as called by SLAED3). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) REAL array, dimension (LDQ,N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input/output) INTEGER array, dimension (N) - On entry, the permutation which separately sorts the two - subproblems in D into ascending order. - On exit, the permutation which will reintegrate the - subproblems back into sorted order, - i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. - - RHO (input) REAL - The subdiagonal entry used to create the rank-1 modification. - - CUTPNT (input) INTEGER - The location of the last eigenvalue in the leading sub-matrix. - min(1,N) <= CUTPNT <= N/2. - - WORK (workspace) REAL array, dimension (4*N + N**2) - - IWORK (workspace) INTEGER array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } else if (*ldq < max(1,*n)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MIN */ - i__1 = 1, i__2 = *n / 2; - if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { - *info = -7; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED1", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are integer pointers which indicate - the portion of the workspace - used by a particular array in SLAED2 and SLAED3. -*/ - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq2 = iw + *n; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); - cpp1 = *cutpnt + 1; - i__1 = *n - *cutpnt; - scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); - -/* Deflate eigenvalues. */ - - slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ - iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ - indxc], &iwork[indxp], &iwork[coltyp], info); - - if (*info != 0) { - goto L20; - } - -/* Solve Secular Equation. */ - - if (k != 0) { - is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + - 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; - slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], - &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ - is], info); - if (*info != 0) { - goto L20; - } - -/* Prepare the INDXQ sorting permutation. */ - - n1 = k; - n2 = *n - k; - slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L10: */ - } - } - -L20: - return 0; - -/* End of SLAED1 */ - -} /* slaed1_ */ - -/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__, - real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * - dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * - indxp, integer *coltyp, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - real r__1, r__2, r__3, r__4; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real c__; - static integer i__, j; - static real s, t; - static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; - static real eps, tau, tol; - static integer psm[4], imax, jmax, ctot[4]; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *), sscal_(integer *, real *, real *, - integer *), scopy_(integer *, real *, integer *, real *, integer * - ); - extern doublereal slapy2_(real *, real *), slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer isamax_(integer *, real *, integer *); - extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer - *, integer *, integer *), slacpy_(char *, integer *, integer *, - real *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLAED2 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny entry in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - K (output) INTEGER - The number of non-deflated eigenvalues, and the order of the - related secular equation. 0 <= K <=N. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - N1 (input) INTEGER - The location of the last eigenvalue in the leading sub-matrix. - min(1,N) <= N1 <= N/2. - - D (input/output) REAL array, dimension (N) - On entry, D contains the eigenvalues of the two submatrices to - be combined. - On exit, D contains the trailing (N-K) updated eigenvalues - (those which were deflated) sorted into increasing order. - - Q (input/output) REAL array, dimension (LDQ, N) - On entry, Q contains the eigenvectors of two submatrices in - the two square blocks with corners at (1,1), (N1,N1) - and (N1+1, N1+1), (N,N). - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input/output) INTEGER array, dimension (N) - The permutation which separately sorts the two sub-problems - in D into ascending order. Note that elements in the second - half of this permutation must first have N1 added to their - values. Destroyed on exit. - - RHO (input/output) REAL - On entry, the off-diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. - On exit, RHO has been modified to the value required by - SLAED3. - - Z (input) REAL array, dimension (N) - On entry, Z contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). - On exit, the contents of Z have been destroyed by the updating - process. - - DLAMDA (output) REAL array, dimension (N) - A copy of the first K eigenvalues which will be used by - SLAED3 to form the secular equation. - - W (output) REAL array, dimension (N) - The first k values of the final deflation-altered z-vector - which will be passed to SLAED3. - - Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) - A copy of the first K eigenvectors which will be used by - SLAED3 in a matrix multiply (SGEMM) to solve for the new - eigenvectors. - - INDX (workspace) INTEGER array, dimension (N) - The permutation used to sort the contents of DLAMDA into - ascending order. - - INDXC (output) INTEGER array, dimension (N) - The permutation used to arrange the columns of the deflated - Q matrix into three groups: the first group contains non-zero - elements only at and above N1, the second contains - non-zero elements only below N1, and the third is dense. - - INDXP (workspace) INTEGER array, dimension (N) - The permutation used to place deflated values of D at the end - of the array. INDXP(1:K) points to the nondeflated D-values - and INDXP(K+1:N) points to the deflated eigenvalues. - - COLTYP (workspace/output) INTEGER array, dimension (N) - During execution, a label which will indicate which of the - following types a column in the Q2 matrix is: - 1 : non-zero in the upper half only; - 2 : dense; - 3 : non-zero in the lower half only; - 4 : deflated. - On exit, COLTYP(i) is the number of columns of type i, - for i=1 to 4 only. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --z__; - --dlamda; - --w; - --q2; - --indx; - --indxc; - --indxp; - --coltyp; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MIN */ - i__1 = 1, i__2 = *n / 2; - if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { - *info = -3; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n2 = *n - *n1; - n1p1 = *n1 + 1; - - if (*rho < 0.f) { - sscal_(&n2, &c_b1150, &z__[n1p1], &c__1); - } - -/* - Normalize z so that norm(z) = 1. Since z is the concatenation of - two normalized vectors, norm2(z) = sqrt(2). -*/ - - t = 1.f / sqrt(2.f); - sscal_(n, &t, &z__[1], &c__1); - -/* RHO = ABS( norm(z)**2 * RHO ) */ - - *rho = (r__1 = *rho * 2.f, dabs(r__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = n1p1; i__ <= i__1; ++i__) { - indxq[i__] += *n1; -/* L10: */ - } - -/* re-integrate the deflated parts from the last pass */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; -/* L20: */ - } - slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indx[i__] = indxq[indxc[i__]]; -/* L30: */ - } - -/* Calculate the allowable deflation tolerance */ - - imax = isamax_(n, &z__[1], &c__1); - jmax = isamax_(n, &d__[1], &c__1); - eps = slamch_("Epsilon"); -/* Computing MAX */ - r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs( - r__2)); - tol = eps * 8.f * dmax(r__3,r__4); - -/* - If the rank-1 modifier is small enough, no more needs to be done - except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { - *k = 0; - iq2 = 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__ = indx[j]; - scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); - dlamda[j] = d__[i__]; - iq2 += *n; -/* L40: */ - } - slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); - scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); - goto L190; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - i__1 = *n1; - for (i__ = 1; i__ <= i__1; ++i__) { - coltyp[i__] = 1; -/* L50: */ - } - i__1 = *n; - for (i__ = n1p1; i__ <= i__1; ++i__) { - coltyp[i__] = 3; -/* L60: */ - } - - - *k = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - nj = indx[j]; - if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - coltyp[nj] = 4; - indxp[k2] = nj; - if (j == *n) { - goto L100; - } - } else { - pj = nj; - goto L80; - } -/* L70: */ - } -L80: - ++j; - nj = indx[j]; - if (j > *n) { - goto L100; - } - if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - coltyp[nj] = 4; - indxp[k2] = nj; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[pj]; - c__ = z__[nj]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = slapy2_(&c__, &s); - t = d__[nj] - d__[pj]; - c__ /= tau; - s = -s / tau; - if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { - -/* Deflation is possible. */ - - z__[nj] = tau; - z__[pj] = 0.f; - if (coltyp[nj] != coltyp[pj]) { - coltyp[nj] = 2; - } - coltyp[pj] = 4; - srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & - c__, &s); -/* Computing 2nd power */ - r__1 = c__; -/* Computing 2nd power */ - r__2 = s; - t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); -/* Computing 2nd power */ - r__1 = s; -/* Computing 2nd power */ - r__2 = c__; - d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); - d__[pj] = t; - --k2; - i__ = 1; -L90: - if (k2 + i__ <= *n) { - if (d__[pj] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = pj; - ++i__; - goto L90; - } else { - indxp[k2 + i__ - 1] = pj; - } - } else { - indxp[k2 + i__ - 1] = pj; - } - pj = nj; - } else { - ++(*k); - dlamda[*k] = d__[pj]; - w[*k] = z__[pj]; - indxp[*k] = pj; - pj = nj; - } - } - goto L80; -L100: - -/* Record the last eigenvalue. */ - - ++(*k); - dlamda[*k] = d__[pj]; - w[*k] = z__[pj]; - indxp[*k] = pj; - -/* - Count up the total number of the various types of columns, then - form a permutation which positions the four column types into - four uniform groups (although one or more of these groups may be - empty). -*/ - - for (j = 1; j <= 4; ++j) { - ctot[j - 1] = 0; -/* L110: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - ct = coltyp[j]; - ++ctot[ct - 1]; -/* L120: */ - } - -/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ - - psm[0] = 1; - psm[1] = ctot[0] + 1; - psm[2] = psm[1] + ctot[1]; - psm[3] = psm[2] + ctot[2]; - *k = *n - ctot[3]; - -/* - Fill out the INDXC array so that the permutation which it induces - will place all type-1 columns first, all type-2 columns next, - then all type-3's, and finally all type-4's. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - js = indxp[j]; - ct = coltyp[js]; - indx[psm[ct - 1]] = js; - indxc[psm[ct - 1]] = j; - ++psm[ct - 1]; -/* L130: */ - } - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - i__ = 1; - iq1 = 1; - iq2 = (ctot[0] + ctot[1]) * *n1 + 1; - i__1 = ctot[0]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); - z__[i__] = d__[js]; - ++i__; - iq1 += *n1; -/* L140: */ - } - - i__1 = ctot[1]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); - scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); - z__[i__] = d__[js]; - ++i__; - iq1 += *n1; - iq2 += n2; -/* L150: */ - } - - i__1 = ctot[2]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); - z__[i__] = d__[js]; - ++i__; - iq2 += n2; -/* L160: */ - } - - iq1 = iq2; - i__1 = ctot[3]; - for (j = 1; j <= i__1; ++j) { - js = indx[i__]; - scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); - iq2 += *n; - z__[i__] = d__[js]; - ++i__; -/* L170: */ - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); - i__1 = *n - *k; - scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); - -/* Copy CTOT into COLTYP for referencing in SLAED3. */ - - for (j = 1; j <= 4; ++j) { - coltyp[j] = ctot[j - 1]; -/* L180: */ - } - -L190: - return 0; - -/* End of SLAED2 */ - -} /* slaed2_ */ - -/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, - real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * - indx, integer *ctot, real *w, real *s, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static integer i__, j, n2, n12, ii, n23, iq2; - static real temp; - extern doublereal snrm2_(integer *, real *, integer *); - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *), scopy_(integer *, real *, - integer *, real *, integer *), slaed4_(integer *, integer *, real - *, real *, real *, real *, real *, integer *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_( - char *, integer *, integer *, real *, integer *, real *, integer * - ), slaset_(char *, integer *, integer *, real *, real *, - real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAED3 finds the roots of the secular equation, as defined by the - values in D, W, and RHO, between 1 and K. It makes the - appropriate calls to SLAED4 and then updates the eigenvectors by - multiplying the matrix of eigenvectors of the pair of eigensystems - being combined by the matrix of eigenvectors of the K-by-K system - which is solved here. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - K (input) INTEGER - The number of terms in the rational function to be solved by - SLAED4. K >= 0. - - N (input) INTEGER - The number of rows and columns in the Q matrix. - N >= K (deflation may result in N>K). - - N1 (input) INTEGER - The location of the last eigenvalue in the leading submatrix. - min(1,N) <= N1 <= N/2. - - D (output) REAL array, dimension (N) - D(I) contains the updated eigenvalues for - 1 <= I <= K. - - Q (output) REAL array, dimension (LDQ,N) - Initially the first K columns are used as workspace. - On output the columns 1 to K contain - the updated eigenvectors. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - RHO (input) REAL - The value of the parameter in the rank one update equation. - RHO >= 0 required. - - DLAMDA (input/output) REAL array, dimension (K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. May be changed on output by - having lowest order bit set to zero on Cray X-MP, Cray Y-MP, - Cray-2, or Cray C-90, as described above. - - Q2 (input) REAL array, dimension (LDQ2, N) - The first K columns of this matrix contain the non-deflated - eigenvectors for the split problem. - - INDX (input) INTEGER array, dimension (N) - The permutation used to arrange the columns of the deflated - Q matrix into three groups (see SLAED2). - The rows of the eigenvectors found by SLAED4 must be likewise - permuted before the matrix multiply can take place. - - CTOT (input) INTEGER array, dimension (4) - A count of the total number of the various types of columns - in Q, as described in INDX. The fourth column type is any - column which has been deflated. - - W (input/output) REAL array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating vector. Destroyed on - output. - - S (workspace) REAL array, dimension (N1 + 1)*K - Will contain the eigenvectors of the repaired matrix which - will be multiplied by the previously accumulated eigenvectors - to update the system. - - LDS (input) INTEGER - The leading dimension of S. LDS >= max(1,K). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dlamda; - --q2; - --indx; - --ctot; - --w; - --s; - - /* Function Body */ - *info = 0; - - if (*k < 0) { - *info = -1; - } else if (*n < *k) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED3", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 0) { - return 0; - } - -/* - Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), - which on any of these machines zeros out the bottommost - bit of DLAMDA(I) if it is 1; this makes the subsequent - subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DLAMDA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DLAMDA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; -/* L10: */ - } - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], - info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - goto L120; - } -/* L20: */ - } - - if (*k == 1) { - goto L110; - } - if (*k == 2) { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - w[1] = q[j * q_dim1 + 1]; - w[2] = q[j * q_dim1 + 2]; - ii = indx[1]; - q[j * q_dim1 + 1] = w[ii]; - ii = indx[2]; - q[j * q_dim1 + 2] = w[ii]; -/* L30: */ - } - goto L110; - } - -/* Compute updated W. */ - - scopy_(k, &w[1], &c__1, &s[1], &c__1); - -/* Initialize W(I) = Q(I,I) */ - - i__1 = *ldq + 1; - scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L40: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L50: */ - } -/* L60: */ - } - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = sqrt(-w[i__]); - w[i__] = r_sign(&r__1, &s[i__]); -/* L70: */ - } - -/* Compute eigenvectors of the modified rank-1 modification. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - s[i__] = w[i__] / q[i__ + j * q_dim1]; -/* L80: */ - } - temp = snrm2_(k, &s[1], &c__1); - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - ii = indx[i__]; - q[i__ + j * q_dim1] = s[ii] / temp; -/* L90: */ - } -/* L100: */ - } - -/* Compute the updated eigenvectors. */ - -L110: - - n2 = *n - *n1; - n12 = ctot[1] + ctot[2]; - n23 = ctot[2] + ctot[3]; - - slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); - iq2 = *n1 * n12 + 1; - if (n23 != 0) { - sgemm_("N", "N", &n2, k, &n23, &c_b871, &q2[iq2], &n2, &s[1], &n23, & - c_b1101, &q[*n1 + 1 + q_dim1], ldq); - } else { - slaset_("A", &n2, k, &c_b1101, &c_b1101, &q[*n1 + 1 + q_dim1], ldq); - } - - slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); - if (n12 != 0) { - sgemm_("N", "N", n1, k, &n12, &c_b871, &q2[1], n1, &s[1], &n12, & - c_b1101, &q[q_offset], ldq); - } else { - slaset_("A", n1, k, &c_b1101, &c_b1101, &q[q_dim1 + 1], ldq); - } - - -L120: - return 0; - -/* End of SLAED3 */ - -} /* slaed3_ */ - -/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__, - real *delta, real *rho, real *dlam, integer *info) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real a, b, c__; - static integer j; - static real w; - static integer ii; - static real dw, zz[3]; - static integer ip1; - static real del, eta, phi, eps, tau, psi; - static integer iim1, iip1; - static real dphi, dpsi; - static integer iter; - static real temp, prew, temp1, dltlb, dltub, midpt; - static integer niter; - static logical swtch; - extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *, - real *, real *), slaed6_(integer *, logical *, real *, real *, - real *, real *, real *, integer *); - static logical swtch3; - extern doublereal slamch_(char *); - static logical orgati; - static real erretm, rhoinv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - December 23, 1999 - - - Purpose - ======= - - This subroutine computes the I-th updated eigenvalue of a symmetric - rank-one modification to a diagonal matrix whose elements are - given in the array d, and that - - D(i) < D(j) for i < j - - and that RHO > 0. This is arranged by the calling routine, and is - no loss in generality. The rank-one modified system is thus - - diag( D ) + RHO * Z * Z_transpose. - - where we assume the Euclidean norm of Z is 1. - - The method consists of approximating the rational functions in the - secular equation by simpler interpolating rational functions. - - Arguments - ========= - - N (input) INTEGER - The length of all arrays. - - I (input) INTEGER - The index of the eigenvalue to be computed. 1 <= I <= N. - - D (input) REAL array, dimension (N) - The original eigenvalues. It is assumed that they are in - order, D(I) < D(J) for I < J. - - Z (input) REAL array, dimension (N) - The components of the updating vector. - - DELTA (output) REAL array, dimension (N) - If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th - component. If N = 1, then DELTA(1) = 1. The vector DELTA - contains the information necessary to construct the - eigenvectors. - - RHO (input) REAL - The scalar in the symmetric updating formula. - - DLAM (output) REAL - The computed lambda_I, the I-th updated eigenvalue. - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, the updating process failed. - - Internal Parameters - =================== - - Logical variable ORGATI (origin-at-i?) is used for distinguishing - whether D(i) or D(i+1) is treated as the origin. - - ORGATI = .true. origin at i - ORGATI = .false. origin at i+1 - - Logical variable SWTCH3 (switch-for-3-poles?) is for noting - if we are working with THREE poles! - - MAXIT is the maximum number of iterations allowed for each - eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Since this routine is called in an inner loop, we do no argument - checking. - - Quick return for N=1 and 2. -*/ - - /* Parameter adjustments */ - --delta; - --z__; - --d__; - - /* Function Body */ - *info = 0; - if (*n == 1) { - -/* Presumably, I=1 upon entry */ - - *dlam = d__[1] + *rho * z__[1] * z__[1]; - delta[1] = 1.f; - return 0; - } - if (*n == 2) { - slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); - return 0; - } - -/* Compute machine epsilon */ - - eps = slamch_("Epsilon"); - rhoinv = 1.f / *rho; - -/* The case I = N */ - - if (*i__ == *n) { - -/* Initialize some basic variables */ - - ii = *n - 1; - niter = 1; - -/* Calculate initial guess */ - - midpt = *rho / 2.f; - -/* - If ||Z||_2 is not one, then TEMP should be set to - RHO * ||Z||_2^2 / TWO -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - midpt; -/* L10: */ - } - - psi = 0.f; - i__1 = *n - 2; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / delta[j]; -/* L20: */ - } - - c__ = rhoinv + psi; - w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* - n]; - - if (w <= 0.f) { - temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) - + z__[*n] * z__[*n] / *rho; - if (c__ <= temp) { - tau = *rho; - } else { - del = d__[*n] - d__[*n - 1]; - a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] - ; - b = z__[*n] * z__[*n] * del; - if (a < 0.f) { - tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); - } - } - -/* - It can be proved that - D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO -*/ - - dltlb = midpt; - dltub = *rho; - } else { - del = d__[*n] - d__[*n - 1]; - a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; - b = z__[*n] * z__[*n] * del; - if (a < 0.f) { - tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); - } - -/* - It can be proved that - D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 -*/ - - dltlb = 0.f; - dltub = midpt; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - tau; -/* L30: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L40: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - *dlam = d__[*i__] + tau; - goto L250; - } - - if (w <= 0.f) { - dltlb = dmax(dltlb,tau); - } else { - dltub = dmin(dltub,tau); - } - -/* Calculate the new step */ - - ++niter; - c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; - a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( - dpsi + dphi); - b = delta[*n - 1] * delta[*n] * w; - if (c__ < 0.f) { - c__ = dabs(c__); - } - if (c__ == 0.f) { -/* - ETA = B/A - ETA = RHO - TAU -*/ - eta = dltub - tau; - } else if (a >= 0.f) { - eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( - c__ * 2.f); - } else { - eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.f) { - eta = -w / (dpsi + dphi); - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.f) { - eta = (dltub - tau) / 2.f; - } else { - eta = (dltlb - tau) / 2.f; - } - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L50: */ - } - - tau += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L60: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 30; ++niter) { - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - *dlam = d__[*i__] + tau; - goto L250; - } - - if (w <= 0.f) { - dltlb = dmax(dltlb,tau); - } else { - dltub = dmin(dltub,tau); - } - -/* Calculate the new step */ - - c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; - a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * - (dpsi + dphi); - b = delta[*n - 1] * delta[*n] * w; - if (a >= 0.f) { - eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } else { - eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.f) { - eta = -w / (dpsi + dphi); - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.f) { - eta = (dltub - tau) / 2.f; - } else { - eta = (dltlb - tau) / 2.f; - } - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L70: */ - } - - tau += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L80: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / delta[*n]; - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * - (dpsi + dphi); - - w = rhoinv + phi + psi; -/* L90: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - *dlam = d__[*i__] + tau; - goto L250; - -/* End for the case I = N */ - - } else { - -/* The case for I < N */ - - niter = 1; - ip1 = *i__ + 1; - -/* Calculate initial guess */ - - del = d__[ip1] - d__[*i__]; - midpt = del / 2.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - midpt; -/* L100: */ - } - - psi = 0.f; - i__1 = *i__ - 1; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / delta[j]; -/* L110: */ - } - - phi = 0.f; - i__1 = *i__ + 2; - for (j = *n; j >= i__1; --j) { - phi += z__[j] * z__[j] / delta[j]; -/* L120: */ - } - c__ = rhoinv + psi + phi; - w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / - delta[ip1]; - - if (w > 0.f) { - -/* - d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 - - We choose d(i) as origin. -*/ - - orgati = TRUE_; - a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; - b = z__[*i__] * z__[*i__] * del; - if (a > 0.f) { - tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } else { - tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } - dltlb = 0.f; - dltub = midpt; - } else { - -/* - (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) - - We choose d(i+1) as origin. -*/ - - orgati = FALSE_; - a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; - b = z__[ip1] * z__[ip1] * del; - if (a < 0.f) { - tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( - r__1)))); - } else { - tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) - / (c__ * 2.f); - } - dltlb = -midpt; - dltub = 0.f; - } - - if (orgati) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - tau; -/* L130: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[ip1] - tau; -/* L140: */ - } - } - if (orgati) { - ii = *i__; - } else { - ii = *i__ + 1; - } - iim1 = ii - 1; - iip1 = ii + 1; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L150: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L160: */ - } - - w = rhoinv + phi + psi; - -/* - W is the value of the secular function with - its ii-th element removed. -*/ - - swtch3 = FALSE_; - if (orgati) { - if (w < 0.f) { - swtch3 = TRUE_; - } - } else { - if (w > 0.f) { - swtch3 = TRUE_; - } - } - if (ii == 1 || ii == *n) { - swtch3 = FALSE_; - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w += temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f - + dabs(tau) * dw; - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - goto L250; - } - - if (w <= 0.f) { - dltlb = dmax(dltlb,tau); - } else { - dltub = dmin(dltub,tau); - } - -/* Calculate the new step */ - - ++niter; - if (! swtch3) { - if (orgati) { -/* Computing 2nd power */ - r__1 = z__[*i__] / delta[*i__]; - c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 * - r__1); - } else { -/* Computing 2nd power */ - r__1 = z__[ip1] / delta[ip1]; - c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 * - r__1); - } - a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * - dw; - b = delta[*i__] * delta[ip1] * w; - if (c__ == 0.f) { - if (a == 0.f) { - if (orgati) { - a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * - (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * - (dpsi + dphi); - } - } - eta = b / a; - } else if (a <= 0.f) { - eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } else { - eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - temp = rhoinv + psi + phi; - if (orgati) { - temp1 = z__[iim1] / delta[iim1]; - temp1 *= temp1; - c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ - iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); - } else { - temp1 = z__[iip1] / delta[iip1]; - temp1 *= temp1; - c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ - iim1]) * temp1; - zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); - zz[2] = z__[iip1] * z__[iip1]; - } - zz[1] = z__[ii] * z__[ii]; - slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); - if (*info != 0) { - goto L250; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.f) { - eta = -w / dw; - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.f) { - eta = (dltub - tau) / 2.f; - } else { - eta = (dltlb - tau) / 2.f; - } - } - - prew = w; - -/* L170: */ - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L180: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L190: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L200: */ - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f - + (r__1 = tau + eta, dabs(r__1)) * dw; - - swtch = FALSE_; - if (orgati) { - if (-w > dabs(prew) / 10.f) { - swtch = TRUE_; - } - } else { - if (w > dabs(prew) / 10.f) { - swtch = TRUE_; - } - } - - tau += eta; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 30; ++niter) { - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - goto L250; - } - - if (w <= 0.f) { - dltlb = dmax(dltlb,tau); - } else { - dltub = dmin(dltub,tau); - } - -/* Calculate the new step */ - - if (! swtch3) { - if (! swtch) { - if (orgati) { -/* Computing 2nd power */ - r__1 = z__[*i__] / delta[*i__]; - c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( - r__1 * r__1); - } else { -/* Computing 2nd power */ - r__1 = z__[ip1] / delta[ip1]; - c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * - (r__1 * r__1); - } - } else { - temp = z__[ii] / delta[ii]; - if (orgati) { - dpsi += temp * temp; - } else { - dphi += temp * temp; - } - c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; - } - a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] - * dw; - b = delta[*i__] * delta[ip1] * w; - if (c__ == 0.f) { - if (a == 0.f) { - if (! swtch) { - if (orgati) { - a = z__[*i__] * z__[*i__] + delta[ip1] * - delta[ip1] * (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ - *i__] * (dpsi + dphi); - } - } else { - a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] - * delta[ip1] * dphi; - } - } - eta = b / a; - } else if (a <= 0.f) { - eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) - )) / (c__ * 2.f); - } else { - eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, - dabs(r__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - temp = rhoinv + psi + phi; - if (swtch) { - c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; - zz[0] = delta[iim1] * delta[iim1] * dpsi; - zz[2] = delta[iip1] * delta[iip1] * dphi; - } else { - if (orgati) { - temp1 = z__[iim1] / delta[iim1]; - temp1 *= temp1; - c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - - d__[iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + - dphi); - } else { - temp1 = z__[iip1] / delta[iip1]; - temp1 *= temp1; - c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - - d__[iim1]) * temp1; - zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - - temp1)); - zz[2] = z__[iip1] * z__[iip1]; - } - } - slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, - info); - if (*info != 0) { - goto L250; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.f) { - eta = -w / dw; - } - temp = tau + eta; - if (temp > dltub || temp < dltlb) { - if (w < 0.f) { - eta = (dltub - tau) / 2.f; - } else { - eta = (dltlb - tau) / 2.f; - } - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; -/* L210: */ - } - - tau += eta; - prew = w; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / delta[j]; - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L220: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / delta[j]; - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L230: */ - } - - temp = z__[ii] / delta[ii]; - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * - 3.f + dabs(tau) * dw; - if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { - swtch = ! swtch; - } - -/* L240: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - if (orgati) { - *dlam = d__[*i__] + tau; - } else { - *dlam = d__[ip1] + tau; - } - - } - -L250: - - return 0; - -/* End of SLAED4 */ - -} /* slaed4_ */ - -/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta, - real *rho, real *dlam) -{ - /* System generated locals */ - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real b, c__, w, del, tau, temp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - This subroutine computes the I-th eigenvalue of a symmetric rank-one - modification of a 2-by-2 diagonal matrix - - diag( D ) + RHO * Z * transpose(Z) . - - The diagonal elements in the array D are assumed to satisfy - - D(i) < D(j) for i < j . - - We also assume RHO > 0 and that the Euclidean norm of the vector - Z is one. - - Arguments - ========= - - I (input) INTEGER - The index of the eigenvalue to be computed. I = 1 or I = 2. - - D (input) REAL array, dimension (2) - The original eigenvalues. We assume D(1) < D(2). - - Z (input) REAL array, dimension (2) - The components of the updating vector. - - DELTA (output) REAL array, dimension (2) - The vector DELTA contains the information necessary - to construct the eigenvectors. - - RHO (input) REAL - The scalar in the symmetric updating formula. - - DLAM (output) REAL - The computed lambda_I, the I-th updated eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --delta; - --z__; - --d__; - - /* Function Body */ - del = d__[2] - d__[1]; - if (*i__ == 1) { - w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f; - if (w > 0.f) { - b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[1] * z__[1] * del; - -/* B > ZERO, always */ - - tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) - )); - *dlam = d__[1] + tau; - delta[1] = -z__[1] / tau; - delta[2] = z__[2] / (del - tau); - } else { - b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * del; - if (b > 0.f) { - tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); - } else { - tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; - } - *dlam = d__[2] + tau; - delta[1] = -z__[1] / (del + tau); - delta[2] = -z__[2] / tau; - } - temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); - delta[1] /= temp; - delta[2] /= temp; - } else { - -/* Now I=2 */ - - b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * del; - if (b > 0.f) { - tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; - } else { - tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); - } - *dlam = d__[2] + tau; - delta[1] = -z__[1] / (del + tau); - delta[2] = -z__[2] / tau; - temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); - delta[1] /= temp; - delta[2] /= temp; - } - return 0; - -/* End OF SLAED5 */ - -} /* slaed5_ */ - -/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho, - real *d__, real *z__, real *finit, real *tau, integer *info) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - real r__1, r__2, r__3, r__4; - - /* Builtin functions */ - double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *); - - /* Local variables */ - static real a, b, c__, f; - static integer i__; - static real fc, df, ddf, eta, eps, base; - static integer iter; - static real temp, temp1, temp2, temp3, temp4; - static logical scale; - static integer niter; - static real small1, small2, sminv1, sminv2, dscale[3], sclfac; - extern doublereal slamch_(char *); - static real zscale[3], erretm, sclinv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAED6 computes the positive or negative root (closest to the origin) - of - z(1) z(2) z(3) - f(x) = rho + --------- + ---------- + --------- - d(1)-x d(2)-x d(3)-x - - It is assumed that - - if ORGATI = .true. the root is between d(2) and d(3); - otherwise it is between d(1) and d(2) - - This routine will be called by SLAED4 when necessary. In most cases, - the root sought is the smallest in magnitude, though it might not be - in some extremely rare situations. - - Arguments - ========= - - KNITER (input) INTEGER - Refer to SLAED4 for its significance. - - ORGATI (input) LOGICAL - If ORGATI is true, the needed root is between d(2) and - d(3); otherwise it is between d(1) and d(2). See - SLAED4 for further details. - - RHO (input) REAL - Refer to the equation f(x) above. - - D (input) REAL array, dimension (3) - D satisfies d(1) < d(2) < d(3). - - Z (input) REAL array, dimension (3) - Each of the elements in z must be positive. - - FINIT (input) REAL - The value of f at 0. It is more accurate than the one - evaluated inside this routine (if someone wants to do - so). - - TAU (output) REAL - The root of the equation f(x). - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, failure to converge - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - --d__; - - /* Function Body */ - - *info = 0; - - niter = 1; - *tau = 0.f; - if (*kniter == 2) { - if (*orgati) { - temp = (d__[3] - d__[2]) / 2.f; - c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); - a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; - b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; - } else { - temp = (d__[1] - d__[2]) / 2.f; - c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); - a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; - b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; - } -/* Computing MAX */ - r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( - c__); - temp = dmax(r__1,r__2); - a /= temp; - b /= temp; - c__ /= temp; - if (c__ == 0.f) { - *tau = b / a; - } else if (a <= 0.f) { - *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( - c__ * 2.f); - } else { - *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) + - z__[3] / (d__[3] - *tau); - if (dabs(*finit) <= dabs(temp)) { - *tau = 0.f; - } - } - -/* - On first call to routine, get machine parameters for - possible scaling to avoid overflow -*/ - - if (first) { - eps = slamch_("Epsilon"); - base = slamch_("Base"); - i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f) - ; - small1 = pow_ri(&base, &i__1); - sminv1 = 1.f / small1; - small2 = small1 * small1; - sminv2 = sminv1 * sminv1; - first = FALSE_; - } - -/* - Determine if scaling of inputs necessary to avoid overflow - when computing 1/TEMP**3 -*/ - - if (*orgati) { -/* Computing MIN */ - r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - * - tau, dabs(r__2)); - temp = dmin(r__3,r__4); - } else { -/* Computing MIN */ - r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - * - tau, dabs(r__2)); - temp = dmin(r__3,r__4); - } - scale = FALSE_; - if (temp <= small1) { - scale = TRUE_; - if (temp <= small2) { - -/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ - - sclfac = sminv2; - sclinv = small2; - } else { - -/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ - - sclfac = sminv1; - sclinv = small1; - } - -/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ - - for (i__ = 1; i__ <= 3; ++i__) { - dscale[i__ - 1] = d__[i__] * sclfac; - zscale[i__ - 1] = z__[i__] * sclfac; -/* L10: */ - } - *tau *= sclfac; - } else { - -/* Copy D and Z to DSCALE and ZSCALE */ - - for (i__ = 1; i__ <= 3; ++i__) { - dscale[i__ - 1] = d__[i__]; - zscale[i__ - 1] = z__[i__]; -/* L20: */ - } - } - - fc = 0.f; - df = 0.f; - ddf = 0.f; - for (i__ = 1; i__ <= 3; ++i__) { - temp = 1.f / (dscale[i__ - 1] - *tau); - temp1 = zscale[i__ - 1] * temp; - temp2 = temp1 * temp; - temp3 = temp2 * temp; - fc += temp1 / dscale[i__ - 1]; - df += temp2; - ddf += temp3; -/* L30: */ - } - f = *finit + *tau * fc; - - if (dabs(f) <= 0.f) { - goto L60; - } - -/* - Iteration begins - - It is not hard to see that - - 1) Iterations will go up monotonically - if FINIT < 0; - - 2) Iterations will go down monotonically - if FINIT > 0. -*/ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - - if (*orgati) { - temp1 = dscale[1] - *tau; - temp2 = dscale[2] - *tau; - } else { - temp1 = dscale[0] - *tau; - temp2 = dscale[1] - *tau; - } - a = (temp1 + temp2) * f - temp1 * temp2 * df; - b = temp1 * temp2 * f; - c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; -/* Computing MAX */ - r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( - c__); - temp = dmax(r__1,r__2); - a /= temp; - b /= temp; - c__ /= temp; - if (c__ == 0.f) { - eta = b / a; - } else if (a <= 0.f) { - eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( - c__ * 2.f); - } else { - eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - if (f * eta >= 0.f) { - eta = -f / df; - } - - temp = eta + *tau; - if (*orgati) { - if (eta > 0.f && temp >= dscale[2]) { - eta = (dscale[2] - *tau) / 2.f; - } - if (eta < 0.f && temp <= dscale[1]) { - eta = (dscale[1] - *tau) / 2.f; - } - } else { - if (eta > 0.f && temp >= dscale[1]) { - eta = (dscale[1] - *tau) / 2.f; - } - if (eta < 0.f && temp <= dscale[0]) { - eta = (dscale[0] - *tau) / 2.f; - } - } - *tau += eta; - - fc = 0.f; - erretm = 0.f; - df = 0.f; - ddf = 0.f; - for (i__ = 1; i__ <= 3; ++i__) { - temp = 1.f / (dscale[i__ - 1] - *tau); - temp1 = zscale[i__ - 1] * temp; - temp2 = temp1 * temp; - temp3 = temp2 * temp; - temp4 = temp1 / dscale[i__ - 1]; - fc += temp4; - erretm += dabs(temp4); - df += temp2; - ddf += temp3; -/* L40: */ - } - f = *finit + *tau * fc; - erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df; - if (dabs(f) <= eps * erretm) { - goto L60; - } -/* L50: */ - } - *info = 1; -L60: - -/* Undo scaling */ - - if (scale) { - *tau *= sclinv; - } - return 0; - -/* End of SLAED6 */ - -} /* slaed6_ */ - -/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz, - integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, - integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * - qstore, integer *qptr, integer *prmptr, integer *perm, integer * - givptr, integer *givcol, real *givnum, real *work, integer *iwork, - integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr, - indxc; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer indxp; - extern /* Subroutine */ int slaed8_(integer *, integer *, integer *, - integer *, real *, real *, integer *, integer *, real *, integer * - , real *, real *, real *, integer *, real *, integer *, integer *, - integer *, real *, integer *, integer *, integer *), slaed9_( - integer *, integer *, integer *, integer *, real *, real *, - integer *, real *, real *, real *, real *, integer *, integer *), - slaeda_(integer *, integer *, integer *, integer *, integer *, - integer *, integer *, integer *, real *, real *, integer *, real * - , real *, integer *); - static integer idlmda; - extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( - integer *, integer *, real *, integer *, integer *, integer *); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAED7 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and optionally eigenvectors of a dense symmetric matrix - that has been reduced to tridiagonal form. SLAED1 handles - the case in which all eigenvalues and eigenvectors of a symmetric - tridiagonal matrix are desired. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine SLAED8. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine SLAED4 (as called by SLAED9). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= CURLVL <= TLVLS. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - D (input/output) REAL array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) REAL array, dimension (LDQ, N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (output) INTEGER array, dimension (N) - The permutation which will reintegrate the subproblem just - solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) - will be in ascending order. - - RHO (input) REAL - The subdiagonal element used to create the rank-1 - modification. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - QSTORE (input/output) REAL array, dimension (N**2+1) - Stores eigenvectors of submatrices encountered during - divide and conquer, packed together. QPTR points to - beginning of the submatrices. - - QPTR (input/output) INTEGER array, dimension (N+2) - List of indices pointing to beginning of submatrices stored - in QSTORE. The submatrices are numbered starting at the - bottom left of the divide and conquer tree, from left to - right and bottom to top. - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and also the size of - the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) REAL array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - WORK (workspace) REAL array, dimension (3*N+QSIZ*N) - - IWORK (workspace) INTEGER array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --qstore; - --qptr; - --prmptr; - --perm; - --givptr; - givcol -= 3; - givnum -= 3; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*icompq == 1 && *qsiz < *n) { - *info = -4; - } else if (*ldq < max(1,*n)) { - *info = -9; - } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED7", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in SLAED8 and SLAED9. -*/ - - if (*icompq == 1) { - ldq2 = *qsiz; - } else { - ldq2 = *n; - } - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq2 = iw + *n; - is = iq2 + *n * ldq2; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); -/* L10: */ - } - curr = ptr + *curpbm; - slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & - givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz - + *n], info); - -/* - When solving the final problem, we no longer need the stored data, - so we will overwrite the data from this level onto the previously - used storage space. -*/ - - if (*curlvl == *tlvls) { - qptr[curr] = 1; - prmptr[curr] = 1; - givptr[curr] = 1; - } - -/* Sort and Deflate eigenvalues. */ - - slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, - cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & - perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) - + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ - indx], info); - prmptr[curr + 1] = prmptr[curr] + *n; - givptr[curr + 1] += givptr[curr]; - -/* Solve Secular Equation. */ - - if (k != 0) { - slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], - &work[iw], &qstore[qptr[curr]], &k, info); - if (*info != 0) { - goto L30; - } - if (*icompq == 1) { - sgemm_("N", "N", qsiz, &k, &k, &c_b871, &work[iq2], &ldq2, & - qstore[qptr[curr]], &k, &c_b1101, &q[q_offset], ldq); - } -/* Computing 2nd power */ - i__1 = k; - qptr[curr + 1] = qptr[curr] + i__1 * i__1; - -/* Prepare the INDXQ sorting permutation. */ - - n1 = k; - n2 = *n - k; - slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - qptr[curr + 1] = qptr[curr]; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L20: */ - } - } - -L30: - return 0; - -/* End of SLAED7 */ - -} /* slaed7_ */ - -/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer - *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, - integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, - real *w, integer *perm, integer *givptr, integer *givcol, real * - givnum, integer *indxp, integer *indx, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real c__; - static integer i__, j; - static real s, t; - static integer k2, n1, n2, jp, n1p1; - static real eps, tau, tol; - static integer jlam, imax, jmax; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *), sscal_(integer *, real *, real *, - integer *), scopy_(integer *, real *, integer *, real *, integer * - ); - extern doublereal slapy2_(real *, real *), slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer isamax_(integer *, real *, integer *); - extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer - *, integer *, integer *), slacpy_(char *, integer *, integer *, - real *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAED8 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny element in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - ICOMPQ (input) INTEGER - = 0: Compute eigenvalues only. - = 1: Compute eigenvectors of original dense symmetric matrix - also. On entry, Q contains the orthogonal matrix used - to reduce the original matrix to tridiagonal form. - - K (output) INTEGER - The number of non-deflated eigenvalues, and the order of the - related secular equation. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the orthogonal matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - D (input/output) REAL array, dimension (N) - On entry, the eigenvalues of the two submatrices to be - combined. On exit, the trailing (N-K) updated eigenvalues - (those which were deflated) sorted into increasing order. - - Q (input/output) REAL array, dimension (LDQ,N) - If ICOMPQ = 0, Q is not referenced. Otherwise, - on entry, Q contains the eigenvectors of the partially solved - system which has been previously updated in matrix - multiplies with other partially solved eigensystems. - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - INDXQ (input) INTEGER array, dimension (N) - The permutation which separately sorts the two sub-problems - in D into ascending order. Note that elements in the second - half of this permutation must first have CUTPNT added to - their values in order to be accurate. - - RHO (input/output) REAL - On entry, the off-diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. - On exit, RHO has been modified to the value required by - SLAED3. - - CUTPNT (input) INTEGER - The location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - Z (input) REAL array, dimension (N) - On entry, Z contains the updating vector (the last row of - the first sub-eigenvector matrix and the first row of the - second sub-eigenvector matrix). - On exit, the contents of Z are destroyed by the updating - process. - - DLAMDA (output) REAL array, dimension (N) - A copy of the first K eigenvalues which will be used by - SLAED3 to form the secular equation. - - Q2 (output) REAL array, dimension (LDQ2,N) - If ICOMPQ = 0, Q2 is not referenced. Otherwise, - a copy of the first K eigenvectors which will be used by - SLAED7 in a matrix multiply (SGEMM) to update the new - eigenvectors. - - LDQ2 (input) INTEGER - The leading dimension of the array Q2. LDQ2 >= max(1,N). - - W (output) REAL array, dimension (N) - The first k values of the final deflation-altered z-vector and - will be passed to SLAED3. - - PERM (output) INTEGER array, dimension (N) - The permutations (from deflation and sorting) to be applied - to each eigenblock. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. - - GIVCOL (output) INTEGER array, dimension (2, N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (output) REAL array, dimension (2, N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INDXP (workspace) INTEGER array, dimension (N) - The permutation used to place deflated values of D at the end - of the array. INDXP(1:K) points to the nondeflated D-values - and INDXP(K+1:N) points to the deflated eigenvalues. - - INDX (workspace) INTEGER array, dimension (N) - The permutation used to sort the contents of D into ascending - order. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --z__; - --dlamda; - q2_dim1 = *ldq2; - q2_offset = 1 + q2_dim1; - q2 -= q2_offset; - --w; - --perm; - givcol -= 3; - givnum -= 3; - --indxp; - --indx; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*n < 0) { - *info = -3; - } else if (*icompq == 1 && *qsiz < *n) { - *info = -4; - } else if (*ldq < max(1,*n)) { - *info = -7; - } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { - *info = -10; - } else if (*ldq2 < max(1,*n)) { - *info = -14; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n1 = *cutpnt; - n2 = *n - n1; - n1p1 = n1 + 1; - - if (*rho < 0.f) { - sscal_(&n2, &c_b1150, &z__[n1p1], &c__1); - } - -/* Normalize z so that norm(z) = 1 */ - - t = 1.f / sqrt(2.f); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - indx[j] = j; -/* L10: */ - } - sscal_(n, &t, &z__[1], &c__1); - *rho = (r__1 = *rho * 2.f, dabs(r__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { - indxq[i__] += *cutpnt; -/* L20: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; - w[i__] = z__[indxq[i__]]; -/* L30: */ - } - i__ = 1; - j = *cutpnt + 1; - slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = dlamda[indx[i__]]; - z__[i__] = w[indx[i__]]; -/* L40: */ - } - -/* Calculate the allowable deflation tolerence */ - - imax = isamax_(n, &z__[1], &c__1); - jmax = isamax_(n, &d__[1], &c__1); - eps = slamch_("Epsilon"); - tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); - -/* - If the rank-1 modifier is small enough, no more needs to be done - except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { - *k = 0; - if (*icompq == 0) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; -/* L50: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; - scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 - + 1], &c__1); -/* L60: */ - } - slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); - } - return 0; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - *k = 0; - *givptr = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - if (j == *n) { - goto L110; - } - } else { - jlam = j; - goto L80; - } -/* L70: */ - } -L80: - ++j; - if (j > *n) { - goto L100; - } - if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[jlam]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = slapy2_(&c__, &s); - t = d__[j] - d__[jlam]; - c__ /= tau; - s = -s / tau; - if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { - -/* Deflation is possible. */ - - z__[j] = tau; - z__[jlam] = 0.f; - -/* Record the appropriate Givens rotation */ - - ++(*givptr); - givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; - givcol[(*givptr << 1) + 2] = indxq[indx[j]]; - givnum[(*givptr << 1) + 1] = c__; - givnum[(*givptr << 1) + 2] = s; - if (*icompq == 1) { - srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ - indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); - } - t = d__[jlam] * c__ * c__ + d__[j] * s * s; - d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; - d__[jlam] = t; - --k2; - i__ = 1; -L90: - if (k2 + i__ <= *n) { - if (d__[jlam] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = jlam; - ++i__; - goto L90; - } else { - indxp[k2 + i__ - 1] = jlam; - } - } else { - indxp[k2 + i__ - 1] = jlam; - } - jlam = j; - } else { - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - jlam = j; - } - } - goto L80; -L100: - -/* Record the last eigenvalue. */ - - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - -L110: - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - if (*icompq == 0) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; -/* L120: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; - scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] - , &c__1); -/* L130: */ - } - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - if (*k < *n) { - if (*icompq == 0) { - i__1 = *n - *k; - scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - } else { - i__1 = *n - *k; - scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = *n - *k; - slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* - k + 1) * q_dim1 + 1], ldq); - } - } - - return 0; - -/* End of SLAED8 */ - -} /* slaed8_ */ - -/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop, - integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, - real *w, real *s, integer *lds, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static integer i__, j; - static real temp; - extern doublereal snrm2_(integer *, real *, integer *); - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), slaed4_(integer *, integer *, real *, real *, real *, - real *, real *, integer *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAED9 finds the roots of the secular equation, as defined by the - values in D, Z, and RHO, between KSTART and KSTOP. It makes the - appropriate calls to SLAED4 and then stores the new matrix of - eigenvectors for use in calculating the next level of Z vectors. - - Arguments - ========= - - K (input) INTEGER - The number of terms in the rational function to be solved by - SLAED4. K >= 0. - - KSTART (input) INTEGER - KSTOP (input) INTEGER - The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP - are to be computed. 1 <= KSTART <= KSTOP <= K. - - N (input) INTEGER - The number of rows and columns in the Q matrix. - N >= K (delation may result in N > K). - - D (output) REAL array, dimension (N) - D(I) contains the updated eigenvalues - for KSTART <= I <= KSTOP. - - Q (workspace) REAL array, dimension (LDQ,N) - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max( 1, N ). - - RHO (input) REAL - The value of the parameter in the rank one update equation. - RHO >= 0 required. - - DLAMDA (input) REAL array, dimension (K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - W (input) REAL array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating vector. - - S (output) REAL array, dimension (LDS, K) - Will contain the eigenvectors of the repaired matrix which - will be stored for subsequent Z vector calculation and - multiplied by the previously accumulated eigenvectors - to update the system. - - LDS (input) INTEGER - The leading dimension of S. LDS >= max( 1, K ). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dlamda; - --w; - s_dim1 = *lds; - s_offset = 1 + s_dim1; - s -= s_offset; - - /* Function Body */ - *info = 0; - - if (*k < 0) { - *info = -1; - } else if (*kstart < 1 || *kstart > max(1,*k)) { - *info = -2; - } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { - *info = -3; - } else if (*n < *k) { - *info = -4; - } else if (*ldq < max(1,*k)) { - *info = -7; - } else if (*lds < max(1,*k)) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAED9", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 0) { - return 0; - } - -/* - Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), - which on any of these machines zeros out the bottommost - bit of DLAMDA(I) if it is 1; this makes the subsequent - subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DLAMDA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DLAMDA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; -/* L10: */ - } - - i__1 = *kstop; - for (j = *kstart; j <= i__1; ++j) { - slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], - info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - goto L120; - } -/* L20: */ - } - - if (*k == 1 || *k == 2) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *k; - for (j = 1; j <= i__2; ++j) { - s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; -/* L30: */ - } -/* L40: */ - } - goto L120; - } - -/* Compute updated W. */ - - scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); - -/* Initialize W(I) = Q(I,I) */ - - i__1 = *ldq + 1; - scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L50: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); -/* L60: */ - } -/* L70: */ - } - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = sqrt(-w[i__]); - w[i__] = r_sign(&r__1, &s[i__ + s_dim1]); -/* L80: */ - } - -/* Compute eigenvectors of the modified rank-1 modification. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; -/* L90: */ - } - temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1); - i__2 = *k; - for (i__ = 1; i__ <= i__2; ++i__) { - s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; -/* L100: */ - } -/* L110: */ - } - -L120: - return 0; - -/* End of SLAED9 */ - -} /* slaed9_ */ - -/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, - integer *curpbm, integer *prmptr, integer *perm, integer *givptr, - integer *givcol, real *givnum, real *q, integer *qptr, real *z__, - real *ztemp, integer *info) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, k, mid, ptr, curr; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *); - static integer bsiz1, bsiz2, psiz1, psiz2, zptr1; - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), - xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAEDA computes the Z vector corresponding to the merge step in the - CURLVLth step of the merge process with TLVLS steps for the CURPBMth - problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= curlvl <= tlvls. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and incidentally the - size of the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) REAL array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - Q (input) REAL array, dimension (N**2) - Contains the square eigenblocks from previous levels, the - starting positions for blocks are given by QPTR. - - QPTR (input) INTEGER array, dimension (N+2) - Contains a list of pointers which indicate where in Q an - eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates - the size of the block. - - Z (output) REAL array, dimension (N) - On output this vector contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). - - ZTEMP (workspace) REAL array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --ztemp; - --z__; - --qptr; - --q; - givnum -= 3; - givcol -= 3; - --givptr; - --perm; - --prmptr; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAEDA", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine location of first number in second half. */ - - mid = *n / 2 + 1; - -/* Gather last/first rows of appropriate eigenblocks into center of Z */ - - ptr = 1; - -/* - Determine location of lowest level subproblem in the full storage - scheme -*/ - - i__1 = *curlvl - 1; - curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; - -/* - Determine size of these matrices. We add HALF to the value of - the SQRT in case the machine underestimates one of these square - roots. -*/ - - bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); - bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); - i__1 = mid - bsiz1 - 1; - for (k = 1; k <= i__1; ++k) { - z__[k] = 0.f; -/* L10: */ - } - scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & - c__1); - scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); - i__1 = *n; - for (k = mid + bsiz2; k <= i__1; ++k) { - z__[k] = 0.f; -/* L20: */ - } - -/* - Loop thru remaining levels 1 -> CURLVL applying the Givens - rotations and permutation and then multiplying the center matrices - against the current Z. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = *curlvl - k; - i__3 = *curlvl - k - 1; - curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - - 1; - psiz1 = prmptr[curr + 1] - prmptr[curr]; - psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; - zptr1 = mid - psiz1; - -/* Apply Givens at CURR and CURR+1 */ - - i__2 = givptr[curr + 1] - 1; - for (i__ = givptr[curr]; i__ <= i__2; ++i__) { - srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & - z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( - i__ << 1) + 1], &givnum[(i__ << 1) + 2]); -/* L30: */ - } - i__2 = givptr[curr + 2] - 1; - for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { - srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ - mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << - 1) + 1], &givnum[(i__ << 1) + 2]); -/* L40: */ - } - psiz1 = prmptr[curr + 1] - prmptr[curr]; - psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; - i__2 = psiz1 - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; -/* L50: */ - } - i__2 = psiz2 - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - - 1]; -/* L60: */ - } - -/* - Multiply Blocks at CURR and CURR+1 - - Determine size of these matrices. We add HALF to the value of - the SQRT in case the machine underestimates one of these - square roots. -*/ - - bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); - bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + - .5f); - if (bsiz1 > 0) { - sgemv_("T", &bsiz1, &bsiz1, &c_b871, &q[qptr[curr]], &bsiz1, & - ztemp[1], &c__1, &c_b1101, &z__[zptr1], &c__1); - } - i__2 = psiz1 - bsiz1; - scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); - if (bsiz2 > 0) { - sgemv_("T", &bsiz2, &bsiz2, &c_b871, &q[qptr[curr + 1]], &bsiz2, & - ztemp[psiz1 + 1], &c__1, &c_b1101, &z__[mid], &c__1); - } - i__2 = psiz2 - bsiz2; - scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & - c__1); - - i__2 = *tlvls - k; - ptr += pow_ii(&c__2, &i__2); -/* L70: */ - } - - return 0; - -/* End of SLAEDA */ - -} /* slaeda_ */ - -/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real * - rt2, real *cs1, real *sn1) -{ - /* System generated locals */ - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs; - static integer sgn1, sgn2; - static real acmn, acmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix - [ A B ] - [ B C ]. - On return, RT1 is the eigenvalue of larger absolute value, RT2 is the - eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right - eigenvector for RT1, giving the decomposition - - [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] - [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. - - Arguments - ========= - - A (input) REAL - The (1,1) element of the 2-by-2 matrix. - - B (input) REAL - The (1,2) element and the conjugate of the (2,1) element of - the 2-by-2 matrix. - - C (input) REAL - The (2,2) element of the 2-by-2 matrix. - - RT1 (output) REAL - The eigenvalue of larger absolute value. - - RT2 (output) REAL - The eigenvalue of smaller absolute value. - - CS1 (output) REAL - SN1 (output) REAL - The vector (CS1, SN1) is a unit right eigenvector for RT1. - - Further Details - =============== - - RT1 is accurate to a few ulps barring over/underflow. - - RT2 may be inaccurate if there is massive cancellation in the - determinant A*C-B*B; higher precision or correctly rounded or - correctly truncated arithmetic would be needed to compute RT2 - accurately in all cases. - - CS1 and SN1 are accurate to a few ulps barring over/underflow. - - Overflow is possible only if RT1 is within a factor of 5 of overflow. - Underflow is harmless if the input data is 0 or exceeds - underflow_threshold / macheps. - - ===================================================================== - - - Compute the eigenvalues -*/ - - sm = *a + *c__; - df = *a - *c__; - adf = dabs(df); - tb = *b + *b; - ab = dabs(tb); - if (dabs(*a) > dabs(*c__)) { - acmx = *a; - acmn = *c__; - } else { - acmx = *c__; - acmn = *a; - } - if (adf > ab) { -/* Computing 2nd power */ - r__1 = ab / adf; - rt = adf * sqrt(r__1 * r__1 + 1.f); - } else if (adf < ab) { -/* Computing 2nd power */ - r__1 = adf / ab; - rt = ab * sqrt(r__1 * r__1 + 1.f); - } else { - -/* Includes case AB=ADF=0 */ - - rt = ab * sqrt(2.f); - } - if (sm < 0.f) { - *rt1 = (sm - rt) * .5f; - sgn1 = -1; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else if (sm > 0.f) { - *rt1 = (sm + rt) * .5f; - sgn1 = 1; - -/* - Order of execution important. - To get fully accurate smaller eigenvalue, - next line needs to be executed in higher precision. -*/ - - *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; - } else { - -/* Includes case RT1 = RT2 = 0 */ - - *rt1 = rt * .5f; - *rt2 = rt * -.5f; - sgn1 = 1; - } - -/* Compute the eigenvector */ - - if (df >= 0.f) { - cs = df + rt; - sgn2 = 1; - } else { - cs = df - rt; - sgn2 = -1; - } - acs = dabs(cs); - if (acs > ab) { - ct = -tb / cs; - *sn1 = 1.f / sqrt(ct * ct + 1.f); - *cs1 = ct * *sn1; - } else { - if (ab == 0.f) { - *cs1 = 1.f; - *sn1 = 0.f; - } else { - tn = -cs / tb; - *cs1 = 1.f / sqrt(tn * tn + 1.f); - *sn1 = tn * *cs1; - } - } - if (sgn1 == sgn2) { - tn = *cs1; - *cs1 = -(*sn1); - *sn1 = tn; - } - return 0; - -/* End of SLAEV2 */ - -} /* slaev2_ */ - -/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n, - integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * - wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * - info) -{ - /* System generated locals */ - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static integer i__, j, k, l, m; - static real s, v[3]; - static integer i1, i2; - static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33, - h44; - static integer nh; - static real cs; - static integer nr; - static real sn; - static integer nz; - static real ave, h33s, h44s; - static integer itn, its; - static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1]; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *), scopy_(integer *, real *, integer *, - real *, integer *), slanv2_(real *, real *, real *, real *, real * - , real *, real *, real *, real *, real *), slabad_(real *, real *) - ; - extern doublereal slamch_(char *); - extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, - real *); - extern doublereal slanhs_(char *, integer *, real *, integer *, real *); - static real smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAHQR is an auxiliary routine called by SHSEQR to update the - eigenvalues and Schur decomposition already computed by SHSEQR, by - dealing with the Hessenberg submatrix in rows and columns ILO to IHI. - - Arguments - ========= - - WANTT (input) LOGICAL - = .TRUE. : the full Schur form T is required; - = .FALSE.: only eigenvalues are required. - - WANTZ (input) LOGICAL - = .TRUE. : the matrix of Schur vectors Z is required; - = .FALSE.: Schur vectors are not required. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper quasi-triangular in - rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless - ILO = 1). SLAHQR works primarily with the Hessenberg - submatrix in rows and columns ILO to IHI, but applies - transformations to all of H if WANTT is .TRUE.. - 1 <= ILO <= max(1,IHI); IHI <= N. - - H (input/output) REAL array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if WANTT is .TRUE., H is upper quasi-triangular in - rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in - standard form. If WANTT is .FALSE., the contents of H are - unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - WR (output) REAL array, dimension (N) - WI (output) REAL array, dimension (N) - The real and imaginary parts, respectively, of the computed - eigenvalues ILO to IHI are stored in the corresponding - elements of WR and WI. If two eigenvalues are computed as a - complex conjugate pair, they are stored in consecutive - elements of WR and WI, say the i-th and (i+1)th, with - WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the - eigenvalues are stored in the same order as on the diagonal - of the Schur form returned in H, with WR(i) = H(i,i), and, if - H(i:i+1,i:i+1) is a 2-by-2 diagonal block, - WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). - - ILOZ (input) INTEGER - IHIZ (input) INTEGER - Specify the rows of Z to which transformations must be - applied if WANTZ is .TRUE.. - 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. - - Z (input/output) REAL array, dimension (LDZ,N) - If WANTZ is .TRUE., on entry Z must contain the current - matrix Z of transformations accumulated by SHSEQR, and on - exit Z has been updated; transformations are applied only to - the submatrix Z(ILOZ:IHIZ,ILO:IHI). - If WANTZ is .FALSE., Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI - in a total of 30*(IHI-ILO+1) iterations; if INFO = i, - elements i+1:ihi of WR and WI contain those eigenvalues - which have been successfully computed. - - Further Details - =============== - - 2-96 Based on modifications by - David Day, Sandia National Laboratory, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --wr; - --wi; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - wr[*ilo] = h__[*ilo + *ilo * h_dim1]; - wi[*ilo] = 0.f; - return 0; - } - - nh = *ihi - *ilo + 1; - nz = *ihiz - *iloz + 1; - -/* - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = slamch_("Safe minimum"); - ovfl = 1.f / unfl; - slabad_(&unfl, &ovfl); - ulp = slamch_("Precision"); - smlnum = unfl * (nh / ulp); - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (*wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of 1 or 2. Each iteration of the loop works - with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L10: - l = *ilo; - if (i__ < *ilo) { - goto L150; - } - -/* - Perform QR iterations on rows and columns ILO to I until a - submatrix of order 1 or 2 splits off at the bottom because a - subdiagonal element has become negligible. -*/ - - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 - = h__[k + k * h_dim1], dabs(r__2)); - if (tst1 == 0.f) { - i__3 = i__ - l + 1; - tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work); - } -/* Computing MAX */ - r__2 = ulp * tst1; - if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2, - smlnum)) { - goto L30; - } -/* L20: */ - } -L30: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible */ - - h__[l + (l - 1) * h_dim1] = 0.f; - } - -/* Exit from loop if a submatrix of order 1 or 2 has split off. */ - - if (l >= i__ - 1) { - goto L140; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! (*wantt)) { - i1 = l; - i2 = i__; - } - - if (its == 10 || its == 20) { - -/* Exceptional shift. */ - - s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = - h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2)); - h44 = s * .75f + h__[i__ + i__ * h_dim1]; - h33 = h44; - h43h34 = s * -.4375f * s; - } else { - -/* - Prepare to use Francis' double shift - (i.e. 2nd degree generalized Rayleigh quotient) -*/ - - h44 = h__[i__ + i__ * h_dim1]; - h33 = h__[i__ - 1 + (i__ - 1) * h_dim1]; - h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * - h_dim1]; - s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) * - h_dim1]; - disc = (h33 - h44) * .5f; - disc = disc * disc + h43h34; - if (disc > 0.f) { - -/* Real roots: use Wilkinson's shift twice */ - - disc = sqrt(disc); - ave = (h33 + h44) * .5f; - if (dabs(h33) - dabs(h44) > 0.f) { - h33 = h33 * h44 - h43h34; - h44 = h33 / (r_sign(&disc, &ave) + ave); - } else { - h44 = r_sign(&disc, &ave) + ave; - } - h33 = h44; - h43h34 = 0.f; - } - } - -/* Look for two consecutive small subdiagonal elements. */ - - i__2 = l; - for (m = i__ - 2; m >= i__2; --m) { -/* - Determine the effect of starting the double-shift QR - iteration at row M, and see if this would make H(M,M-1) - negligible. -*/ - - h11 = h__[m + m * h_dim1]; - h22 = h__[m + 1 + (m + 1) * h_dim1]; - h21 = h__[m + 1 + m * h_dim1]; - h12 = h__[m + (m + 1) * h_dim1]; - h44s = h44 - h11; - h33s = h33 - h11; - v1 = (h33s * h44s - h43h34) / h21 + h12; - v2 = h22 - h11 - h33s - h44s; - v3 = h__[m + 2 + (m + 1) * h_dim1]; - s = dabs(v1) + dabs(v2) + dabs(v3); - v1 /= s; - v2 /= s; - v3 /= s; - v[0] = v1; - v[1] = v2; - v[2] = v3; - if (m == l) { - goto L50; - } - h00 = h__[m - 1 + (m - 1) * h_dim1]; - h10 = h__[m + (m - 1) * h_dim1]; - tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22)); - if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) { - goto L50; - } -/* L40: */ - } -L50: - -/* Double-shift QR step */ - - i__2 = i__ - 1; - for (k = m; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__3 = 3, i__4 = i__ - k + 1; - nr = min(i__3,i__4); - if (k > m) { - scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - slarfg_(&nr, v, &v[1], &c__1, &t1); - if (k > m) { - h__[k + (k - 1) * h_dim1] = v[0]; - h__[k + 1 + (k - 1) * h_dim1] = 0.f; - if (k < i__ - 1) { - h__[k + 2 + (k - 1) * h_dim1] = 0.f; - } - } else if (m > l) { - h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1]; - } - v2 = v[1]; - t2 = t1 * v2; - if (nr == 3) { - v3 = v[2]; - t3 = t1 * v3; - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] - + v3 * h__[k + 2 + j * h_dim1]; - h__[k + j * h_dim1] -= sum * t1; - h__[k + 1 + j * h_dim1] -= sum * t2; - h__[k + 2 + j * h_dim1] -= sum * t3; -/* L60: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+3,I). - - Computing MIN -*/ - i__4 = k + 3; - i__3 = min(i__4,i__); - for (j = i1; j <= i__3; ++j) { - sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] - + v3 * h__[j + (k + 2) * h_dim1]; - h__[j + k * h_dim1] -= sum * t1; - h__[j + (k + 1) * h_dim1] -= sum * t2; - h__[j + (k + 2) * h_dim1] -= sum * t3; -/* L70: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * - z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; - z__[j + k * z_dim1] -= sum * t1; - z__[j + (k + 1) * z_dim1] -= sum * t2; - z__[j + (k + 2) * z_dim1] -= sum * t3; -/* L80: */ - } - } - } else if (nr == 2) { - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; - h__[k + j * h_dim1] -= sum * t1; - h__[k + 1 + j * h_dim1] -= sum * t2; -/* L90: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+3,I). -*/ - - i__3 = i__; - for (j = i1; j <= i__3; ++j) { - sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] - ; - h__[j + k * h_dim1] -= sum * t1; - h__[j + (k + 1) * h_dim1] -= sum * t2; -/* L100: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * - z_dim1]; - z__[j + k * z_dim1] -= sum * t1; - z__[j + (k + 1) * z_dim1] -= sum * t2; -/* L110: */ - } - } - } -/* L120: */ - } - -/* L130: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L140: - - if (l == i__) { - -/* H(I,I-1) is negligible: one eigenvalue has converged. */ - - wr[i__] = h__[i__ + i__ * h_dim1]; - wi[i__] = 0.f; - } else if (l == i__ - 1) { - -/* - H(I-1,I-2) is negligible: a pair of eigenvalues have converged. - - Transform the 2-by-2 submatrix to standard Schur form, - and compute and store the eigenvalues. -*/ - - slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * - h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * - h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, - &sn); - - if (*wantt) { - -/* Apply the transformation to the rest of H. */ - - if (i2 > i__) { - i__1 = i2 - i__; - srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ - i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); - } - i__1 = i__ - i1 - 1; - srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * - h_dim1], &c__1, &cs, &sn); - } - if (*wantz) { - -/* Apply the transformation to Z. */ - - srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + - i__ * z_dim1], &c__1, &cs, &sn); - } - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L10; - -L150: - return 0; - -/* End of SLAHQR */ - -} /* slahqr_ */ - -/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a, - integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, - i__3; - real r__1; - - /* Local variables */ - static integer i__; - static real ei; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sgemv_(char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *), scopy_( - integer *, real *, integer *, real *, integer *), saxpy_(integer * - , real *, real *, integer *, real *, integer *), strmv_(char *, - char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *, - integer *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLAHRD reduces the first NB columns of a real general n-by-(n-k+1) - matrix A so that elements below the k-th subdiagonal are zero. The - reduction is performed by an orthogonal similarity transformation - Q' * A * Q. The routine returns the matrices V and T which determine - Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. - - This is an auxiliary routine called by SGEHRD. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. - - K (input) INTEGER - The offset for the reduction. Elements below the k-th - subdiagonal in the first NB columns are reduced to zero. - - NB (input) INTEGER - The number of columns to be reduced. - - A (input/output) REAL array, dimension (LDA,N-K+1) - On entry, the n-by-(n-k+1) general matrix A. - On exit, the elements on and above the k-th subdiagonal in - the first NB columns are overwritten with the corresponding - elements of the reduced matrix; the elements below the k-th - subdiagonal, with the array TAU, represent the matrix Q as a - product of elementary reflectors. The other columns of A are - unchanged. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) REAL array, dimension (NB) - The scalar factors of the elementary reflectors. See Further - Details. - - T (output) REAL array, dimension (LDT,NB) - The upper triangular matrix T. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= NB. - - Y (output) REAL array, dimension (LDY,NB) - The n-by-nb matrix Y. - - LDY (input) INTEGER - The leading dimension of the array Y. LDY >= N. - - Further Details - =============== - - The matrix Q is represented as a product of nb elementary reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in - A(i+k+1:n,i), and tau in TAU(i). - - The elements of the vectors v together form the (n-k+1)-by-nb matrix - V which is needed, with T and Y, to apply the transformation to the - unreduced part of the matrix, using an update of the form: - A := (I - V*T*V') * (A - Y*V'). - - The contents of A on exit are illustrated by the following example - with n = 7, k = 3 and nb = 2: - - ( a h a a a ) - ( a h a a a ) - ( a h a a a ) - ( h h a a a ) - ( v1 h a a a ) - ( v1 v2 a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - --tau; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*n <= 1) { - return 0; - } - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ > 1) { - -/* - Update A(1:n,i) - - Compute i-th column of A - Y * V' -*/ - - i__2 = i__ - 1; - sgemv_("No transpose", n, &i__2, &c_b1150, &y[y_offset], ldy, &a[* - k + i__ - 1 + a_dim1], lda, &c_b871, &a[i__ * a_dim1 + 1], - &c__1); - -/* - Apply I - V * T' * V' to this column (call it b) from the - left, using the last column of T as workspace - - Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) - ( V2 ) ( b2 ) - - where V1 is unit lower triangular - - w := V1' * b1 -*/ - - i__2 = i__ - 1; - scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + - 1], &c__1); - i__2 = i__ - 1; - strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], - lda, &t[*nb * t_dim1 + 1], &c__1); - -/* w := w + V2'*b2 */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[*k + i__ + a_dim1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b871, &t[*nb * - t_dim1 + 1], &c__1); - -/* w := T'*w */ - - i__2 = i__ - 1; - strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[*nb * t_dim1 + 1], &c__1); - -/* b2 := b2 - V2*w */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[*k + i__ + - a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b871, &a[*k - + i__ + i__ * a_dim1], &c__1); - -/* b1 := b1 - V1*w */ - - i__2 = i__ - 1; - strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] - , lda, &t[*nb * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - saxpy_(&i__2, &c_b1150, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + - i__ * a_dim1], &c__1); - - a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; - } - -/* - Generate the elementary reflector H(i) to annihilate - A(k+i+1:n,i) -*/ - - i__2 = *n - *k - i__ + 1; -/* Computing MIN */ - i__3 = *k + i__ + 1; - slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &tau[i__]); - ei = a[*k + i__ + i__ * a_dim1]; - a[*k + i__ + i__ * a_dim1] = 1.f; - -/* Compute Y(1:n,i) */ - - i__2 = *n - *k - i__ + 1; - sgemv_("No transpose", n, &i__2, &c_b871, &a[(i__ + 1) * a_dim1 + 1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[*k + i__ + a_dim1], lda, - &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1101, &t[i__ * - t_dim1 + 1], &c__1); - i__2 = i__ - 1; - sgemv_("No transpose", n, &i__2, &c_b1150, &y[y_offset], ldy, &t[i__ * - t_dim1 + 1], &c__1, &c_b871, &y[i__ * y_dim1 + 1], &c__1); - sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); - -/* Compute T(1:i,i) */ - - i__2 = i__ - 1; - r__1 = -tau[i__]; - sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[i__ * t_dim1 + 1], &c__1) - ; - t[i__ + i__ * t_dim1] = tau[i__]; - -/* L10: */ - } - a[*k + *nb + *nb * a_dim1] = ei; - - return 0; - -/* End of SLAHRD */ - -} /* slahrd_ */ - -/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real * - smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, - integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, - real *xnorm, integer *info) -{ - /* Initialized data */ - - static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; - static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; - static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, - 4,3,2,1 }; - - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; - real r__1, r__2, r__3, r__4, r__5, r__6; - static real equiv_0[4], equiv_1[4]; - - /* Local variables */ - static integer j; -#define ci (equiv_0) -#define cr (equiv_1) - static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, - cr22, li21, csi, ui11, lr21, ui12, ui22; -#define civ (equiv_0) - static real csr, ur11, ur12, ur22; -#define crv (equiv_1) - static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; - static integer icmax; - static real bnorm, cnorm, smini; - extern doublereal slamch_(char *); - static real bignum; - extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * - , real *); - static real smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLALN2 solves a system of the form (ca A - w D ) X = s B - or (ca A' - w D) X = s B with possible scaling ("s") and - perturbation of A. (A' means A-transpose.) - - A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA - real diagonal matrix, w is a real or complex value, and X and B are - NA x 1 matrices -- real if w is real, complex if w is complex. NA - may be 1 or 2. - - If w is complex, X and B are represented as NA x 2 matrices, - the first column of each being the real part and the second - being the imaginary part. - - "s" is a scaling factor (.LE. 1), computed by SLALN2, which is - so chosen that X can be computed without overflow. X is further - scaled if necessary to assure that norm(ca A - w D)*norm(X) is less - than overflow. - - If both singular values of (ca A - w D) are less than SMIN, - SMIN*identity will be used instead of (ca A - w D). If only one - singular value is less than SMIN, one element of (ca A - w D) will be - perturbed enough to make the smallest singular value roughly SMIN. - If both singular values are at least SMIN, (ca A - w D) will not be - perturbed. In any case, the perturbation will be at most some small - multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values - are computed by infinity-norm approximations, and thus will only be - correct to a factor of 2 or so. - - Note: all input quantities are assumed to be smaller than overflow - by a reasonable factor. (See BIGNUM.) - - Arguments - ========== - - LTRANS (input) LOGICAL - =.TRUE.: A-transpose will be used. - =.FALSE.: A will be used (not transposed.) - - NA (input) INTEGER - The size of the matrix A. It may (only) be 1 or 2. - - NW (input) INTEGER - 1 if "w" is real, 2 if "w" is complex. It may only be 1 - or 2. - - SMIN (input) REAL - The desired lower bound on the singular values of A. This - should be a safe distance away from underflow or overflow, - say, between (underflow/machine precision) and (machine - precision * overflow ). (See BIGNUM and ULP.) - - CA (input) REAL - The coefficient c, which A is multiplied by. - - A (input) REAL array, dimension (LDA,NA) - The NA x NA matrix A. - - LDA (input) INTEGER - The leading dimension of A. It must be at least NA. - - D1 (input) REAL - The 1,1 element in the diagonal matrix D. - - D2 (input) REAL - The 2,2 element in the diagonal matrix D. Not used if NW=1. - - B (input) REAL array, dimension (LDB,NW) - The NA x NW matrix B (right-hand side). If NW=2 ("w" is - complex), column 1 contains the real part of B and column 2 - contains the imaginary part. - - LDB (input) INTEGER - The leading dimension of B. It must be at least NA. - - WR (input) REAL - The real part of the scalar "w". - - WI (input) REAL - The imaginary part of the scalar "w". Not used if NW=1. - - X (output) REAL array, dimension (LDX,NW) - The NA x NW matrix X (unknowns), as computed by SLALN2. - If NW=2 ("w" is complex), on exit, column 1 will contain - the real part of X and column 2 will contain the imaginary - part. - - LDX (input) INTEGER - The leading dimension of X. It must be at least NA. - - SCALE (output) REAL - The scale factor that B must be multiplied by to insure - that overflow does not occur when computing X. Thus, - (ca A - w D) X will be SCALE*B, not B (ignoring - perturbations of A.) It will be at most 1. - - XNORM (output) REAL - The infinity-norm of X, when X is regarded as an NA x NW - real matrix. - - INFO (output) INTEGER - An error flag. It will be set to zero if no error occurs, - a negative number if an argument is in error, or a positive - number if ca A - w D had to be perturbed. - The possible values are: - = 0: No error occurred, and (ca A - w D) did not have to be - perturbed. - = 1: (ca A - w D) had to be perturbed to make its smallest - (or only) singular value greater than SMIN. - NOTE: In the interests of speed, this routine does not - check the inputs for errors. - - ===================================================================== -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - - /* Function Body */ - -/* Compute BIGNUM */ - - smlnum = 2.f * slamch_("Safe minimum"); - bignum = 1.f / smlnum; - smini = dmax(*smin,smlnum); - -/* Don't check for input errors */ - - *info = 0; - -/* Standard Initializations */ - - *scale = 1.f; - - if (*na == 1) { - -/* 1 x 1 (i.e., scalar) system C X = B */ - - if (*nw == 1) { - -/* - Real 1x1 system. - - C = ca A - w D -*/ - - csr = *ca * a[a_dim1 + 1] - *wr * *d1; - cnorm = dabs(csr); - -/* If | C | < SMINI, use C = SMINI */ - - if (cnorm < smini) { - csr = smini; - cnorm = smini; - *info = 1; - } - -/* Check scaling for X = B / C */ - - bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)); - if (cnorm < 1.f && bnorm > 1.f) { - if (bnorm > bignum * cnorm) { - *scale = 1.f / bnorm; - } - } - -/* Compute X */ - - x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; - *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)); - } else { - -/* - Complex 1x1 system (w is complex) - - C = ca A - w D -*/ - - csr = *ca * a[a_dim1 + 1] - *wr * *d1; - csi = -(*wi) * *d1; - cnorm = dabs(csr) + dabs(csi); - -/* If | C | < SMINI, use C = SMINI */ - - if (cnorm < smini) { - csr = smini; - csi = 0.f; - cnorm = smini; - *info = 1; - } - -/* Check scaling for X = B / C */ - - bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 << - 1) + 1], dabs(r__2)); - if (cnorm < 1.f && bnorm > 1.f) { - if (bnorm > bignum * cnorm) { - *scale = 1.f / bnorm; - } - } - -/* Compute X */ - - r__1 = *scale * b[b_dim1 + 1]; - r__2 = *scale * b[(b_dim1 << 1) + 1]; - sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) - + 1]); - *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 << - 1) + 1], dabs(r__2)); - } - - } else { - -/* - 2x2 System - - Compute the real part of C = ca A - w D (or ca A' - w D ) -*/ - - cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; - cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; - if (*ltrans) { - cr[2] = *ca * a[a_dim1 + 2]; - cr[1] = *ca * a[(a_dim1 << 1) + 1]; - } else { - cr[1] = *ca * a[a_dim1 + 2]; - cr[2] = *ca * a[(a_dim1 << 1) + 1]; - } - - if (*nw == 1) { - -/* - Real 2x2 system (w is real) - - Find the largest element in C -*/ - - cmax = 0.f; - icmax = 0; - - for (j = 1; j <= 4; ++j) { - if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) { - cmax = (r__1 = crv[j - 1], dabs(r__1)); - icmax = j; - } -/* L10: */ - } - -/* If norm(C) < SMINI, use SMINI*identity. */ - - if (cmax < smini) { -/* Computing MAX */ - r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[ - b_dim1 + 2], dabs(r__2)); - bnorm = dmax(r__3,r__4); - if (smini < 1.f && bnorm > 1.f) { - if (bnorm > bignum * smini) { - *scale = 1.f / bnorm; - } - } - temp = *scale / smini; - x[x_dim1 + 1] = temp * b[b_dim1 + 1]; - x[x_dim1 + 2] = temp * b[b_dim1 + 2]; - *xnorm = temp * bnorm; - *info = 1; - return 0; - } - -/* Gaussian elimination with complete pivoting. */ - - ur11 = crv[icmax - 1]; - cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; - ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; - cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; - ur11r = 1.f / ur11; - lr21 = ur11r * cr21; - ur22 = cr22 - ur12 * lr21; - -/* If smaller pivot < SMINI, use SMINI */ - - if (dabs(ur22) < smini) { - ur22 = smini; - *info = 1; - } - if (rswap[icmax - 1]) { - br1 = b[b_dim1 + 2]; - br2 = b[b_dim1 + 1]; - } else { - br1 = b[b_dim1 + 1]; - br2 = b[b_dim1 + 2]; - } - br2 -= lr21 * br1; -/* Computing MAX */ - r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2) - ; - bbnd = dmax(r__2,r__3); - if (bbnd > 1.f && dabs(ur22) < 1.f) { - if (bbnd >= bignum * dabs(ur22)) { - *scale = 1.f / bbnd; - } - } - - xr2 = br2 * *scale / ur22; - xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); - if (cswap[icmax - 1]) { - x[x_dim1 + 1] = xr2; - x[x_dim1 + 2] = xr1; - } else { - x[x_dim1 + 1] = xr1; - x[x_dim1 + 2] = xr2; - } -/* Computing MAX */ - r__1 = dabs(xr1), r__2 = dabs(xr2); - *xnorm = dmax(r__1,r__2); - -/* Further scaling if norm(A) norm(X) > overflow */ - - if (*xnorm > 1.f && cmax > 1.f) { - if (*xnorm > bignum / cmax) { - temp = cmax / bignum; - x[x_dim1 + 1] = temp * x[x_dim1 + 1]; - x[x_dim1 + 2] = temp * x[x_dim1 + 2]; - *xnorm = temp * *xnorm; - *scale = temp * *scale; - } - } - } else { - -/* - Complex 2x2 system (w is complex) - - Find the largest element in C -*/ - - ci[0] = -(*wi) * *d1; - ci[1] = 0.f; - ci[2] = 0.f; - ci[3] = -(*wi) * *d2; - cmax = 0.f; - icmax = 0; - - for (j = 1; j <= 4; ++j) { - if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1], - dabs(r__2)) > cmax) { - cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - - 1], dabs(r__2)); - icmax = j; - } -/* L20: */ - } - -/* If norm(C) < SMINI, use SMINI*identity. */ - - if (cmax < smini) { -/* Computing MAX */ - r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 - << 1) + 1], dabs(r__2)), r__6 = (r__3 = b[b_dim1 + 2], - dabs(r__3)) + (r__4 = b[(b_dim1 << 1) + 2], dabs( - r__4)); - bnorm = dmax(r__5,r__6); - if (smini < 1.f && bnorm > 1.f) { - if (bnorm > bignum * smini) { - *scale = 1.f / bnorm; - } - } - temp = *scale / smini; - x[x_dim1 + 1] = temp * b[b_dim1 + 1]; - x[x_dim1 + 2] = temp * b[b_dim1 + 2]; - x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; - x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; - *xnorm = temp * bnorm; - *info = 1; - return 0; - } - -/* Gaussian elimination with complete pivoting. */ - - ur11 = crv[icmax - 1]; - ui11 = civ[icmax - 1]; - cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; - ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; - ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; - ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; - cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; - ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; - if (icmax == 1 || icmax == 4) { - -/* Code when off-diagonals of pivoted C are real */ - - if (dabs(ur11) > dabs(ui11)) { - temp = ui11 / ur11; -/* Computing 2nd power */ - r__1 = temp; - ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f)); - ui11r = -temp * ur11r; - } else { - temp = ur11 / ui11; -/* Computing 2nd power */ - r__1 = temp; - ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f)); - ur11r = -temp * ui11r; - } - lr21 = cr21 * ur11r; - li21 = cr21 * ui11r; - ur12s = ur12 * ur11r; - ui12s = ur12 * ui11r; - ur22 = cr22 - ur12 * lr21; - ui22 = ci22 - ur12 * li21; - } else { - -/* Code when diagonals of pivoted C are real */ - - ur11r = 1.f / ur11; - ui11r = 0.f; - lr21 = cr21 * ur11r; - li21 = ci21 * ur11r; - ur12s = ur12 * ur11r; - ui12s = ui12 * ur11r; - ur22 = cr22 - ur12 * lr21 + ui12 * li21; - ui22 = -ur12 * li21 - ui12 * lr21; - } - u22abs = dabs(ur22) + dabs(ui22); - -/* If smaller pivot < SMINI, use SMINI */ - - if (u22abs < smini) { - ur22 = smini; - ui22 = 0.f; - *info = 1; - } - if (rswap[icmax - 1]) { - br2 = b[b_dim1 + 1]; - br1 = b[b_dim1 + 2]; - bi2 = b[(b_dim1 << 1) + 1]; - bi1 = b[(b_dim1 << 1) + 2]; - } else { - br1 = b[b_dim1 + 1]; - br2 = b[b_dim1 + 2]; - bi1 = b[(b_dim1 << 1) + 1]; - bi2 = b[(b_dim1 << 1) + 2]; - } - br2 = br2 - lr21 * br1 + li21 * bi1; - bi2 = bi2 - li21 * br1 - lr21 * bi1; -/* Computing MAX */ - r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs( - ui11r))), r__2 = dabs(br2) + dabs(bi2); - bbnd = dmax(r__1,r__2); - if (bbnd > 1.f && u22abs < 1.f) { - if (bbnd >= bignum * u22abs) { - *scale = 1.f / bbnd; - br1 = *scale * br1; - bi1 = *scale * bi1; - br2 = *scale * br2; - bi2 = *scale * bi2; - } - } - - sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); - xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; - xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; - if (cswap[icmax - 1]) { - x[x_dim1 + 1] = xr2; - x[x_dim1 + 2] = xr1; - x[(x_dim1 << 1) + 1] = xi2; - x[(x_dim1 << 1) + 2] = xi1; - } else { - x[x_dim1 + 1] = xr1; - x[x_dim1 + 2] = xr2; - x[(x_dim1 << 1) + 1] = xi1; - x[(x_dim1 << 1) + 2] = xi2; - } -/* Computing MAX */ - r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2); - *xnorm = dmax(r__1,r__2); - -/* Further scaling if norm(A) norm(X) > overflow */ - - if (*xnorm > 1.f && cmax > 1.f) { - if (*xnorm > bignum / cmax) { - temp = cmax / bignum; - x[x_dim1 + 1] = temp * x[x_dim1 + 1]; - x[x_dim1 + 2] = temp * x[x_dim1 + 2]; - x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; - x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; - *xnorm = temp * *xnorm; - *scale = temp * *scale; - } - } - } - } - - return 0; - -/* End of SLALN2 */ - -} /* slaln2_ */ - -#undef crv -#undef civ -#undef cr -#undef ci - - -doublereal slamch_(char *cmach) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - real ret_val; - - /* Builtin functions */ - double pow_ri(real *, integer *); - - /* Local variables */ - static real t; - static integer it; - static real rnd, eps, base; - static integer beta; - static real emin, prec, emax; - static integer imin, imax; - static logical lrnd; - static real rmin, rmax, rmach; - extern logical lsame_(char *, char *); - static real small, sfmin; - extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real - *, integer *, real *, integer *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMCH determines single precision machine parameters. - - Arguments - ========= - - CMACH (input) CHARACTER*1 - Specifies the value to be returned by SLAMCH: - = 'E' or 'e', SLAMCH := eps - = 'S' or 's , SLAMCH := sfmin - = 'B' or 'b', SLAMCH := base - = 'P' or 'p', SLAMCH := eps*base - = 'N' or 'n', SLAMCH := t - = 'R' or 'r', SLAMCH := rnd - = 'M' or 'm', SLAMCH := emin - = 'U' or 'u', SLAMCH := rmin - = 'L' or 'l', SLAMCH := emax - = 'O' or 'o', SLAMCH := rmax - - where - - eps = relative machine precision - sfmin = safe minimum, such that 1/sfmin does not overflow - base = base of the machine - prec = eps*base - t = number of (base) digits in the mantissa - rnd = 1.0 when rounding occurs in addition, 0.0 otherwise - emin = minimum exponent before (gradual) underflow - rmin = underflow threshold - base**(emin-1) - emax = largest exponent before overflow - rmax = overflow threshold - (base**emax)*(1-eps) - - ===================================================================== -*/ - - - if (first) { - first = FALSE_; - slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); - base = (real) beta; - t = (real) it; - if (lrnd) { - rnd = 1.f; - i__1 = 1 - it; - eps = pow_ri(&base, &i__1) / 2; - } else { - rnd = 0.f; - i__1 = 1 - it; - eps = pow_ri(&base, &i__1); - } - prec = eps * base; - emin = (real) imin; - emax = (real) imax; - sfmin = rmin; - small = 1.f / rmax; - if (small >= sfmin) { - -/* - Use SMALL plus a bit, to avoid the possibility of rounding - causing overflow when computing 1/sfmin. -*/ - - sfmin = small * (eps + 1.f); - } - } - - if (lsame_(cmach, "E")) { - rmach = eps; - } else if (lsame_(cmach, "S")) { - rmach = sfmin; - } else if (lsame_(cmach, "B")) { - rmach = base; - } else if (lsame_(cmach, "P")) { - rmach = prec; - } else if (lsame_(cmach, "N")) { - rmach = t; - } else if (lsame_(cmach, "R")) { - rmach = rnd; - } else if (lsame_(cmach, "M")) { - rmach = emin; - } else if (lsame_(cmach, "U")) { - rmach = rmin; - } else if (lsame_(cmach, "L")) { - rmach = emax; - } else if (lsame_(cmach, "O")) { - rmach = rmax; - } - - ret_val = rmach; - return ret_val; - -/* End of SLAMCH */ - -} /* slamch_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical - *ieee1) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - real r__1, r__2; - - /* Local variables */ - static real a, b, c__, f, t1, t2; - static integer lt; - static real one, qtr; - static logical lrnd; - static integer lbeta; - static real savec; - static logical lieee1; - extern doublereal slamc3_(real *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMC1 determines the machine parameters given by BETA, T, RND, and - IEEE1. - - Arguments - ========= - - BETA (output) INTEGER - The base of the machine. - - T (output) INTEGER - The number of ( BETA ) digits in the mantissa. - - RND (output) LOGICAL - Specifies whether proper rounding ( RND = .TRUE. ) or - chopping ( RND = .FALSE. ) occurs in addition. This may not - be a reliable guide to the way in which the machine performs - its arithmetic. - - IEEE1 (output) LOGICAL - Specifies whether rounding appears to be done in the IEEE - 'round to nearest' style. - - Further Details - =============== - - The routine is based on the routine ENVRON by Malcolm and - incorporates suggestions by Gentleman and Marovich. See - - Malcolm M. A. (1972) Algorithms to reveal properties of - floating-point arithmetic. Comms. of the ACM, 15, 949-951. - - Gentleman W. M. and Marovich S. B. (1974) More on algorithms - that reveal properties of floating point arithmetic units. - Comms. of the ACM, 17, 276-277. - - ===================================================================== -*/ - - - if (first) { - first = FALSE_; - one = 1.f; - -/* - LBETA, LIEEE1, LT and LRND are the local values of BETA, - IEEE1, T and RND. - - Throughout this routine we use the function SLAMC3 to ensure - that relevant values are stored and not held in registers, or - are not affected by optimizers. - - Compute a = 2.0**m with the smallest positive integer m such - that - - fl( a + 1.0 ) = a. -*/ - - a = 1.f; - c__ = 1.f; - -/* + WHILE( C.EQ.ONE )LOOP */ -L10: - if (c__ == one) { - a *= 2; - c__ = slamc3_(&a, &one); - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - goto L10; - } -/* - + END WHILE - - Now compute b = 2.0**m with the smallest positive integer m - such that - - fl( a + b ) .gt. a. -*/ - - b = 1.f; - c__ = slamc3_(&a, &b); - -/* + WHILE( C.EQ.A )LOOP */ -L20: - if (c__ == a) { - b *= 2; - c__ = slamc3_(&a, &b); - goto L20; - } -/* - + END WHILE - - Now compute the base. a and c are neighbouring floating point - numbers in the interval ( beta**t, beta**( t + 1 ) ) and so - their difference is beta. Adding 0.25 to c is to ensure that it - is truncated to beta and not ( beta - 1 ). -*/ - - qtr = one / 4; - savec = c__; - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - lbeta = c__ + qtr; - -/* - Now determine whether rounding or chopping occurs, by adding a - bit less than beta/2 and a bit more than beta/2 to a. -*/ - - b = (real) lbeta; - r__1 = b / 2; - r__2 = -b / 100; - f = slamc3_(&r__1, &r__2); - c__ = slamc3_(&f, &a); - if (c__ == a) { - lrnd = TRUE_; - } else { - lrnd = FALSE_; - } - r__1 = b / 2; - r__2 = b / 100; - f = slamc3_(&r__1, &r__2); - c__ = slamc3_(&f, &a); - if (lrnd && c__ == a) { - lrnd = FALSE_; - } - -/* - Try and decide whether rounding is done in the IEEE 'round to - nearest' style. B/2 is half a unit in the last place of the two - numbers A and SAVEC. Furthermore, A is even, i.e. has last bit - zero, and SAVEC is odd. Thus adding B/2 to A should not change - A, but adding B/2 to SAVEC should change SAVEC. -*/ - - r__1 = b / 2; - t1 = slamc3_(&r__1, &a); - r__1 = b / 2; - t2 = slamc3_(&r__1, &savec); - lieee1 = t1 == a && t2 > savec && lrnd; - -/* - Now find the mantissa, t. It should be the integer part of - log to the base beta of a, however it is safer to determine t - by powering. So we find t as the smallest positive integer for - which - - fl( beta**t + 1.0 ) = 1.0. -*/ - - lt = 0; - a = 1.f; - c__ = 1.f; - -/* + WHILE( C.EQ.ONE )LOOP */ -L30: - if (c__ == one) { - ++lt; - a *= lbeta; - c__ = slamc3_(&a, &one); - r__1 = -a; - c__ = slamc3_(&c__, &r__1); - goto L30; - } -/* + END WHILE */ - - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *ieee1 = lieee1; - return 0; - -/* End of SLAMC1 */ - -} /* slamc1_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * - eps, integer *emin, real *rmin, integer *emax, real *rmax) -{ - /* Initialized data */ - - static logical first = TRUE_; - static logical iwarn = FALSE_; - - /* Format strings */ - static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" - "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" - "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" - " the IF block as marked within the code of routine\002,\002 SLAM" - "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; - - /* System generated locals */ - integer i__1; - real r__1, r__2, r__3, r__4, r__5; - - /* Builtin functions */ - double pow_ri(real *, integer *); - integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); - - /* Local variables */ - static real a, b, c__; - static integer i__, lt; - static real one, two; - static logical ieee; - static real half; - static logical lrnd; - static real leps, zero; - static integer lbeta; - static real rbase; - static integer lemin, lemax, gnmin; - static real small; - static integer gpmin; - static real third, lrmin, lrmax, sixth; - static logical lieee1; - extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, - logical *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int slamc4_(integer *, real *, integer *), - slamc5_(integer *, integer *, integer *, logical *, integer *, - real *); - static integer ngnmin, ngpmin; - - /* Fortran I/O blocks */ - static cilist io___2878 = { 0, 6, 0, fmt_9999, 0 }; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMC2 determines the machine parameters specified in its argument - list. - - Arguments - ========= - - BETA (output) INTEGER - The base of the machine. - - T (output) INTEGER - The number of ( BETA ) digits in the mantissa. - - RND (output) LOGICAL - Specifies whether proper rounding ( RND = .TRUE. ) or - chopping ( RND = .FALSE. ) occurs in addition. This may not - be a reliable guide to the way in which the machine performs - its arithmetic. - - EPS (output) REAL - The smallest positive number such that - - fl( 1.0 - EPS ) .LT. 1.0, - - where fl denotes the computed value. - - EMIN (output) INTEGER - The minimum exponent before (gradual) underflow occurs. - - RMIN (output) REAL - The smallest normalized number for the machine, given by - BASE**( EMIN - 1 ), where BASE is the floating point value - of BETA. - - EMAX (output) INTEGER - The maximum exponent before overflow occurs. - - RMAX (output) REAL - The largest positive number for the machine, given by - BASE**EMAX * ( 1 - EPS ), where BASE is the floating point - value of BETA. - - Further Details - =============== - - The computation of EPS is based on a routine PARANOIA by - W. Kahan of the University of California at Berkeley. - - ===================================================================== -*/ - - - if (first) { - first = FALSE_; - zero = 0.f; - one = 1.f; - two = 2.f; - -/* - LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of - BETA, T, RND, EPS, EMIN and RMIN. - - Throughout this routine we use the function SLAMC3 to ensure - that relevant values are stored and not held in registers, or - are not affected by optimizers. - - SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. -*/ - - slamc1_(&lbeta, <, &lrnd, &lieee1); - -/* Start to find EPS. */ - - b = (real) lbeta; - i__1 = -lt; - a = pow_ri(&b, &i__1); - leps = a; - -/* Try some tricks to see whether or not this is the correct EPS. */ - - b = two / 3; - half = one / 2; - r__1 = -half; - sixth = slamc3_(&b, &r__1); - third = slamc3_(&sixth, &sixth); - r__1 = -half; - b = slamc3_(&third, &r__1); - b = slamc3_(&b, &sixth); - b = dabs(b); - if (b < leps) { - b = leps; - } - - leps = 1.f; - -/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ -L10: - if (leps > b && b > zero) { - leps = b; - r__1 = half * leps; -/* Computing 5th power */ - r__3 = two, r__4 = r__3, r__3 *= r__3; -/* Computing 2nd power */ - r__5 = leps; - r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); - c__ = slamc3_(&r__1, &r__2); - r__1 = -c__; - c__ = slamc3_(&half, &r__1); - b = slamc3_(&half, &c__); - r__1 = -b; - c__ = slamc3_(&half, &r__1); - b = slamc3_(&half, &c__); - goto L10; - } -/* + END WHILE */ - - if (a < leps) { - leps = a; - } - -/* - Computation of EPS complete. - - Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). - Keep dividing A by BETA until (gradual) underflow occurs. This - is detected when we cannot recover the previous A. -*/ - - rbase = one / lbeta; - small = one; - for (i__ = 1; i__ <= 3; ++i__) { - r__1 = small * rbase; - small = slamc3_(&r__1, &zero); -/* L20: */ - } - a = slamc3_(&one, &small); - slamc4_(&ngpmin, &one, &lbeta); - r__1 = -one; - slamc4_(&ngnmin, &r__1, &lbeta); - slamc4_(&gpmin, &a, &lbeta); - r__1 = -a; - slamc4_(&gnmin, &r__1, &lbeta); - ieee = FALSE_; - - if (ngpmin == ngnmin && gpmin == gnmin) { - if (ngpmin == gpmin) { - lemin = ngpmin; -/* - ( Non twos-complement machines, no gradual underflow; - e.g., VAX ) -*/ - } else if (gpmin - ngpmin == 3) { - lemin = ngpmin - 1 + lt; - ieee = TRUE_; -/* - ( Non twos-complement machines, with gradual underflow; - e.g., IEEE standard followers ) -*/ - } else { - lemin = min(ngpmin,gpmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if (ngpmin == gpmin && ngnmin == gnmin) { - if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { - lemin = max(ngpmin,ngnmin); -/* - ( Twos-complement machines, no gradual underflow; - e.g., CYBER 205 ) -*/ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) - { - if (gpmin - min(ngpmin,ngnmin) == 3) { - lemin = max(ngpmin,ngnmin) - 1 + lt; -/* - ( Twos-complement machines with gradual underflow; - no known machine ) -*/ - } else { - lemin = min(ngpmin,ngnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } - - } else { -/* Computing MIN */ - i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); - lemin = min(i__1,gnmin); -/* ( A guess; no known machine ) */ - iwarn = TRUE_; - } -/* - ** - Comment out this if block if EMIN is ok -*/ - if (iwarn) { - first = TRUE_; - s_wsfe(&io___2878); - do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); - e_wsfe(); - } -/* - ** - - Assume IEEE arithmetic if we found denormalised numbers above, - or if arithmetic seems to round in the IEEE style, determined - in routine SLAMC1. A true IEEE machine should have both things - true; however, faulty machines may have one or the other. -*/ - - ieee = ieee || lieee1; - -/* - Compute RMIN by successive division by BETA. We could compute - RMIN as BASE**( EMIN - 1 ), but some machines underflow during - this computation. -*/ - - lrmin = 1.f; - i__1 = 1 - lemin; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = lrmin * rbase; - lrmin = slamc3_(&r__1, &zero); -/* L30: */ - } - -/* Finally, call SLAMC5 to compute EMAX and RMAX. */ - - slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); - } - - *beta = lbeta; - *t = lt; - *rnd = lrnd; - *eps = leps; - *emin = lemin; - *rmin = lrmin; - *emax = lemax; - *rmax = lrmax; - - return 0; - - -/* End of SLAMC2 */ - -} /* slamc2_ */ - - -/* *********************************************************************** */ - -doublereal slamc3_(real *a, real *b) -{ - /* System generated locals */ - real ret_val; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMC3 is intended to force A and B to be stored prior to doing - the addition of A and B , for use in situations where optimizers - might hold one of these in a register. - - Arguments - ========= - - A, B (input) REAL - The values A and B. - - ===================================================================== -*/ - - - ret_val = *a + *b; - - return ret_val; - -/* End of SLAMC3 */ - -} /* slamc3_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Local variables */ - static real a; - static integer i__; - static real b1, b2, c1, c2, d1, d2, one, zero, rbase; - extern doublereal slamc3_(real *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMC4 is a service routine for SLAMC2. - - Arguments - ========= - - EMIN (output) EMIN - The minimum exponent before (gradual) underflow, computed by - setting A = START and dividing by BASE until the previous A - can not be recovered. - - START (input) REAL - The starting point for determining EMIN. - - BASE (input) INTEGER - The base of the machine. - - ===================================================================== -*/ - - - a = *start; - one = 1.f; - rbase = one / *base; - zero = 0.f; - *emin = 1; - r__1 = a * rbase; - b1 = slamc3_(&r__1, &zero); - c1 = a; - c2 = a; - d1 = a; - d2 = a; -/* - + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. - $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP -*/ -L10: - if (c1 == a && c2 == a && d1 == a && d2 == a) { - --(*emin); - a = b1; - r__1 = a / *base; - b1 = slamc3_(&r__1, &zero); - r__1 = b1 * *base; - c1 = slamc3_(&r__1, &zero); - d1 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d1 += b1; -/* L20: */ - } - r__1 = a * rbase; - b2 = slamc3_(&r__1, &zero); - r__1 = b2 / rbase; - c2 = slamc3_(&r__1, &zero); - d2 = zero; - i__1 = *base; - for (i__ = 1; i__ <= i__1; ++i__) { - d2 += b2; -/* L30: */ - } - goto L10; - } -/* + END WHILE */ - - return 0; - -/* End of SLAMC4 */ - -} /* slamc4_ */ - - -/* *********************************************************************** */ - -/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, - logical *ieee, integer *emax, real *rmax) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Local variables */ - static integer i__; - static real y, z__; - static integer try__, lexp; - static real oldy; - static integer uexp, nbits; - extern doublereal slamc3_(real *, real *); - static real recbas; - static integer exbits, expsum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAMC5 attempts to compute RMAX, the largest machine floating-point - number, without overflow. It assumes that EMAX + abs(EMIN) sum - approximately to a power of 2. It will fail on machines where this - assumption does not hold, for example, the Cyber 205 (EMIN = -28625, - EMAX = 28718). It will also fail if the value supplied for EMIN is - too large (i.e. too close to zero), probably with overflow. - - Arguments - ========= - - BETA (input) INTEGER - The base of floating-point arithmetic. - - P (input) INTEGER - The number of base BETA digits in the mantissa of a - floating-point value. - - EMIN (input) INTEGER - The minimum exponent before (gradual) underflow. - - IEEE (input) LOGICAL - A logical flag specifying whether or not the arithmetic - system is thought to comply with the IEEE standard. - - EMAX (output) INTEGER - The largest exponent before overflow - - RMAX (output) REAL - The largest machine floating-point number. - - ===================================================================== - - - First compute LEXP and UEXP, two powers of 2 that bound - abs(EMIN). We then assume that EMAX + abs(EMIN) will sum - approximately to the bound that is closest to abs(EMIN). - (EMAX is the exponent of the required number RMAX). -*/ - - lexp = 1; - exbits = 1; -L10: - try__ = lexp << 1; - if (try__ <= -(*emin)) { - lexp = try__; - ++exbits; - goto L10; - } - if (lexp == -(*emin)) { - uexp = lexp; - } else { - uexp = try__; - ++exbits; - } - -/* - Now -LEXP is less than or equal to EMIN, and -UEXP is greater - than or equal to EMIN. EXBITS is the number of bits needed to - store the exponent. -*/ - - if (uexp + *emin > -lexp - *emin) { - expsum = lexp << 1; - } else { - expsum = uexp << 1; - } - -/* - EXPSUM is the exponent range, approximately equal to - EMAX - EMIN + 1 . -*/ - - *emax = expsum + *emin - 1; - nbits = exbits + 1 + *p; - -/* - NBITS is the total number of bits needed to store a - floating-point number. -*/ - - if (nbits % 2 == 1 && *beta == 2) { - -/* - Either there are an odd number of bits used to store a - floating-point number, which is unlikely, or some bits are - not used in the representation of numbers, which is possible, - (e.g. Cray machines) or the mantissa has an implicit bit, - (e.g. IEEE machines, Dec Vax machines), which is perhaps the - most likely. We have to assume the last alternative. - If this is true, then we need to reduce EMAX by one because - there must be some way of representing zero in an implicit-bit - system. On machines like Cray, we are reducing EMAX by one - unnecessarily. -*/ - - --(*emax); - } - - if (*ieee) { - -/* - Assume we are on an IEEE machine which reserves one exponent - for infinity and NaN. -*/ - - --(*emax); - } - -/* - Now create RMAX, the largest machine number, which should - be equal to (1.0 - BETA**(-P)) * BETA**EMAX . - - First compute 1.0 - BETA**(-P), being careful that the - result is less than 1.0 . -*/ - - recbas = 1.f / *beta; - z__ = *beta - 1.f; - y = 0.f; - i__1 = *p; - for (i__ = 1; i__ <= i__1; ++i__) { - z__ *= recbas; - if (y < 1.f) { - oldy = y; - } - y = slamc3_(&y, &z__); -/* L20: */ - } - if (y >= 1.f) { - y = oldy; - } - -/* Now multiply by BETA**EMAX to get RMAX. */ - - i__1 = *emax; - for (i__ = 1; i__ <= i__1; ++i__) { - r__1 = y * *beta; - y = slamc3_(&r__1, &c_b1101); -/* L30: */ - } - - *rmax = y; - return 0; - -/* End of SLAMC5 */ - -} /* slamc5_ */ - -/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer * - strd1, integer *strd2, integer *index) -{ - /* System generated locals */ - integer i__1; - - /* Local variables */ - static integer i__, ind1, ind2, n1sv, n2sv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAMRG will create a permutation list which will merge the elements - of A (which is composed of two independently sorted sets) into a - single set which is sorted in ascending order. - - Arguments - ========= - - N1 (input) INTEGER - N2 (input) INTEGER - These arguements contain the respective lengths of the two - sorted lists to be merged. - - A (input) REAL array, dimension (N1+N2) - The first N1 elements of A contain a list of numbers which - are sorted in either ascending or descending order. Likewise - for the final N2 elements. - - STRD1 (input) INTEGER - STRD2 (input) INTEGER - These are the strides to be taken through the array A. - Allowable strides are 1 and -1. They indicate whether a - subset of A is sorted in ascending (STRDx = 1) or descending - (STRDx = -1) order. - - INDEX (output) INTEGER array, dimension (N1+N2) - On exit this array will contain a permutation such that - if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be - sorted in ascending order. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --index; - --a; - - /* Function Body */ - n1sv = *n1; - n2sv = *n2; - if (*strd1 > 0) { - ind1 = 1; - } else { - ind1 = *n1; - } - if (*strd2 > 0) { - ind2 = *n1 + 1; - } else { - ind2 = *n1 + *n2; - } - i__ = 1; -/* while ( (N1SV > 0) & (N2SV > 0) ) */ -L10: - if (n1sv > 0 && n2sv > 0) { - if (a[ind1] <= a[ind2]) { - index[i__] = ind1; - ++i__; - ind1 += *strd1; - --n1sv; - } else { - index[i__] = ind2; - ++i__; - ind2 += *strd2; - --n2sv; - } - goto L10; - } -/* end while */ - if (n1sv == 0) { - i__1 = n2sv; - for (n1sv = 1; n1sv <= i__1; ++n1sv) { - index[i__] = ind2; - ++i__; - ind2 += *strd2; -/* L20: */ - } - } else { -/* N2SV .EQ. 0 */ - i__1 = n1sv; - for (n2sv = 1; n2sv <= i__1; ++n2sv) { - index[i__] = ind1; - ++i__; - ind1 += *strd1; -/* L30: */ - } - } - - return 0; - -/* End of SLAMRG */ - -} /* slamrg_ */ - -doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda, - real *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real ret_val, r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, - real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLANGE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real matrix A. - - Description - =========== - - SLANGE returns the value - - SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in SLANGE as described - above. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. When M = 0, - SLANGE is set to zero. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. When N = 0, - SLANGE is set to zero. - - A (input) REAL array, dimension (LDA,N) - The m by n matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(M,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= M when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (min(*m,*n) == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); - value = dmax(r__2,r__3); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); -/* L30: */ - } - value = dmax(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); -/* L60: */ - } -/* L70: */ - } - value = 0.f; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of SLANGE */ - -} /* slange_ */ - -doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - real ret_val, r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, - real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLANHS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - Hessenberg matrix A. - - Description - =========== - - SLANHS returns the value - - SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in SLANHS as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, SLANHS is - set to zero. - - A (input) REAL array, dimension (LDA,N) - The n by n upper Hessenberg matrix A; the part of A below the - first sub-diagonal is not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= N when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); - value = dmax(r__2,r__3); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); -/* L30: */ - } - value = dmax(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); -/* L60: */ - } -/* L70: */ - } - value = 0.f; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of SLANHS */ - -} /* slanhs_ */ - -doublereal slanst_(char *norm, integer *n, real *d__, real *e) -{ - /* System generated locals */ - integer i__1; - real ret_val, r__1, r__2, r__3, r__4, r__5; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__; - static real sum, scale; - extern logical lsame_(char *, char *); - static real anorm; - extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, - real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLANST returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real symmetric tridiagonal matrix A. - - Description - =========== - - SLANST returns the value - - SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in SLANST as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, SLANST is - set to zero. - - D (input) REAL array, dimension (N) - The diagonal elements of A. - - E (input) REAL array, dimension (N-1) - The (n-1) sub-diagonal or super-diagonal elements of A. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - if (*n <= 0) { - anorm = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - anorm = (r__1 = d__[*n], dabs(r__1)); - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1)); - anorm = dmax(r__2,r__3); -/* Computing MAX */ - r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1)); - anorm = dmax(r__2,r__3); -/* L10: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1' || lsame_(norm, "I")) { - -/* Find norm1(A). */ - - if (*n == 1) { - anorm = dabs(d__[1]); - } else { -/* Computing MAX */ - r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs( - r__1)) + (r__2 = d__[*n], dabs(r__2)); - anorm = dmax(r__3,r__4); - i__1 = *n - 1; - for (i__ = 2; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 = - e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3)); - anorm = dmax(r__4,r__5); -/* L20: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - if (*n > 1) { - i__1 = *n - 1; - slassq_(&i__1, &e[1], &c__1, &scale, &sum); - sum *= 2; - } - slassq_(n, &d__[1], &c__1, &scale, &sum); - anorm = scale * sqrt(sum); - } - - ret_val = anorm; - return ret_val; - -/* End of SLANST */ - -} /* slanst_ */ - -doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, - real *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real ret_val, r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static real sum, absa, scale; - extern logical lsame_(char *, char *); - static real value; - extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, - real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLANSY returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - real symmetric matrix A. - - Description - =========== - - SLANSY returns the value - - SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in SLANSY as described - above. - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is to be referenced. - = 'U': Upper triangular part of A is referenced - = 'L': Lower triangular part of A is referenced - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, SLANSY is - set to zero. - - A (input) REAL array, dimension (LDA,N) - The symmetric matrix A. If UPLO = 'U', the leading n by n - upper triangular part of A contains the upper triangular part - of the matrix A, and the strictly lower triangular part of A - is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of A contains the lower triangular part of - the matrix A, and the strictly upper triangular part of A is - not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) REAL array, dimension (LWORK), - where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, - WORK is not referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.f; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( - r__1)); - value = dmax(r__2,r__3); -/* L10: */ - } -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j; i__ <= i__2; ++i__) { -/* Computing MAX */ - r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( - r__1)); - value = dmax(r__2,r__3); -/* L30: */ - } -/* L40: */ - } - } - } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { - -/* Find normI(A) ( = norm1(A), since A is symmetric). */ - - value = 0.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.f; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); - sum += absa; - work[i__] += absa; -/* L50: */ - } - work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1)); -/* L60: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = value, r__2 = work[i__]; - value = dmax(r__1,r__2); -/* L70: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.f; -/* L80: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1)); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); - sum += absa; - work[i__] += absa; -/* L90: */ - } - value = dmax(value,sum); -/* L100: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.f; - sum = 1.f; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L110: */ - } - } else { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); -/* L120: */ - } - } - sum *= 2; - i__1 = *lda + 1; - slassq_(n, &a[a_offset], &i__1, &scale, &sum); - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of SLANSY */ - -} /* slansy_ */ - -/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real * - rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn) -{ - /* System generated locals */ - real r__1, r__2; - - /* Builtin functions */ - double r_sign(real *, real *), sqrt(doublereal); - - /* Local variables */ - static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, - scale, bcmax, bcmis, sigma; - extern doublereal slapy2_(real *, real *), slamch_(char *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric - matrix in standard form: - - [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] - [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] - - where either - 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or - 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex - conjugate eigenvalues. - - Arguments - ========= - - A (input/output) REAL - B (input/output) REAL - C (input/output) REAL - D (input/output) REAL - On entry, the elements of the input matrix. - On exit, they are overwritten by the elements of the - standardised Schur form. - - RT1R (output) REAL - RT1I (output) REAL - RT2R (output) REAL - RT2I (output) REAL - The real and imaginary parts of the eigenvalues. If the - eigenvalues are a complex conjugate pair, RT1I > 0. - - CS (output) REAL - SN (output) REAL - Parameters of the rotation matrix. - - Further Details - =============== - - Modified by V. Sima, Research Institute for Informatics, Bucharest, - Romania, to reduce the risk of cancellation errors, - when computing real eigenvalues, and to ensure, if possible, that - abs(RT1R) >= abs(RT2R). - - ===================================================================== -*/ - - - eps = slamch_("P"); - if (*c__ == 0.f) { - *cs = 1.f; - *sn = 0.f; - goto L10; - - } else if (*b == 0.f) { - -/* Swap rows and columns */ - - *cs = 0.f; - *sn = 1.f; - temp = *d__; - *d__ = *a; - *a = temp; - *b = -(*c__); - *c__ = 0.f; - goto L10; - } else if (*a - *d__ == 0.f && r_sign(&c_b871, b) != r_sign(&c_b871, c__)) - { - *cs = 1.f; - *sn = 0.f; - goto L10; - } else { - - temp = *a - *d__; - p = temp * .5f; -/* Computing MAX */ - r__1 = dabs(*b), r__2 = dabs(*c__); - bcmax = dmax(r__1,r__2); -/* Computing MIN */ - r__1 = dabs(*b), r__2 = dabs(*c__); - bcmis = dmin(r__1,r__2) * r_sign(&c_b871, b) * r_sign(&c_b871, c__); -/* Computing MAX */ - r__1 = dabs(p); - scale = dmax(r__1,bcmax); - z__ = p / scale * p + bcmax / scale * bcmis; - -/* - If Z is of the order of the machine accuracy, postpone the - decision on the nature of eigenvalues -*/ - - if (z__ >= eps * 4.f) { - -/* Real eigenvalues. Compute A and D. */ - - r__1 = sqrt(scale) * sqrt(z__); - z__ = p + r_sign(&r__1, &p); - *a = *d__ + z__; - *d__ -= bcmax / z__ * bcmis; - -/* Compute B and the rotation matrix */ - - tau = slapy2_(c__, &z__); - *cs = z__ / tau; - *sn = *c__ / tau; - *b -= *c__; - *c__ = 0.f; - } else { - -/* - Complex eigenvalues, or real (almost) equal eigenvalues. - Make diagonal elements equal. -*/ - - sigma = *b + *c__; - tau = slapy2_(&sigma, &temp); - *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f); - *sn = -(p / (tau * *cs)) * r_sign(&c_b871, &sigma); - -/* - Compute [ AA BB ] = [ A B ] [ CS -SN ] - [ CC DD ] [ C D ] [ SN CS ] -*/ - - aa = *a * *cs + *b * *sn; - bb = -(*a) * *sn + *b * *cs; - cc = *c__ * *cs + *d__ * *sn; - dd = -(*c__) * *sn + *d__ * *cs; - -/* - Compute [ A B ] = [ CS SN ] [ AA BB ] - [ C D ] [-SN CS ] [ CC DD ] -*/ - - *a = aa * *cs + cc * *sn; - *b = bb * *cs + dd * *sn; - *c__ = -aa * *sn + cc * *cs; - *d__ = -bb * *sn + dd * *cs; - - temp = (*a + *d__) * .5f; - *a = temp; - *d__ = temp; - - if (*c__ != 0.f) { - if (*b != 0.f) { - if (r_sign(&c_b871, b) == r_sign(&c_b871, c__)) { - -/* Real eigenvalues: reduce to upper triangular form */ - - sab = sqrt((dabs(*b))); - sac = sqrt((dabs(*c__))); - r__1 = sab * sac; - p = r_sign(&r__1, c__); - tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1))); - *a = temp + p; - *d__ = temp - p; - *b -= *c__; - *c__ = 0.f; - cs1 = sab * tau; - sn1 = sac * tau; - temp = *cs * cs1 - *sn * sn1; - *sn = *cs * sn1 + *sn * cs1; - *cs = temp; - } - } else { - *b = -(*c__); - *c__ = 0.f; - temp = *cs; - *cs = -(*sn); - *sn = temp; - } - } - } - - } - -L10: - -/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ - - *rt1r = *a; - *rt2r = *d__; - if (*c__ == 0.f) { - *rt1i = 0.f; - *rt2i = 0.f; - } else { - *rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__))); - *rt2i = -(*rt1i); - } - return 0; - -/* End of SLANV2 */ - -} /* slanv2_ */ - -doublereal slapy2_(real *x, real *y) -{ - /* System generated locals */ - real ret_val, r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real w, z__, xabs, yabs; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary - overflow. - - Arguments - ========= - - X (input) REAL - Y (input) REAL - X and Y specify the values x and y. - - ===================================================================== -*/ - - - xabs = dabs(*x); - yabs = dabs(*y); - w = dmax(xabs,yabs); - z__ = dmin(xabs,yabs); - if (z__ == 0.f) { - ret_val = w; - } else { -/* Computing 2nd power */ - r__1 = z__ / w; - ret_val = w * sqrt(r__1 * r__1 + 1.f); - } - return ret_val; - -/* End of SLAPY2 */ - -} /* slapy2_ */ - -doublereal slapy3_(real *x, real *y, real *z__) -{ - /* System generated locals */ - real ret_val, r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real w, xabs, yabs, zabs; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause - unnecessary overflow. - - Arguments - ========= - - X (input) REAL - Y (input) REAL - Z (input) REAL - X, Y and Z specify the values x, y and z. - - ===================================================================== -*/ - - - xabs = dabs(*x); - yabs = dabs(*y); - zabs = dabs(*z__); -/* Computing MAX */ - r__1 = max(xabs,yabs); - w = dmax(r__1,zabs); - if (w == 0.f) { - ret_val = 0.f; - } else { -/* Computing 2nd power */ - r__1 = xabs / w; -/* Computing 2nd power */ - r__2 = yabs / w; -/* Computing 2nd power */ - r__3 = zabs / w; - ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3); - } - return ret_val; - -/* End of SLAPY3 */ - -} /* slapy3_ */ - -/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v, - integer *incv, real *tau, real *c__, integer *ldc, real *work) -{ - /* System generated locals */ - integer c_dim1, c_offset; - real r__1; - - /* Local variables */ - extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, - integer *, real *, integer *, real *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLARF applies a real elementary reflector H to a real m by n matrix - C, from either the left or the right. H is represented in the form - - H = I - tau * v * v' - - where tau is a real scalar and v is a real vector. - - If tau = 0, then H is taken to be the unit matrix. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) REAL array, dimension - (1 + (M-1)*abs(INCV)) if SIDE = 'L' - or (1 + (N-1)*abs(INCV)) if SIDE = 'R' - The vector v in the representation of H. V is not used if - TAU = 0. - - INCV (input) INTEGER - The increment between elements of v. INCV <> 0. - - TAU (input) REAL - The value tau in the representation of H. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) REAL array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (lsame_(side, "L")) { - -/* Form H * C */ - - if (*tau != 0.f) { - -/* w := C' * v */ - - sgemv_("Transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], - incv, &c_b1101, &work[1], &c__1); - -/* C := C - v * w' */ - - r__1 = -(*tau); - sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], - ldc); - } - } else { - -/* Form C * H */ - - if (*tau != 0.f) { - -/* w := C * v */ - - sgemv_("No transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], - incv, &c_b1101, &work[1], &c__1); - -/* C := C - w * v' */ - - r__1 = -(*tau); - sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], - ldc); - } - } - return 0; - -/* End of SLARF */ - -} /* slarf_ */ - -/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char * - storev, integer *m, integer *n, integer *k, real *v, integer *ldv, - real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * - ldwork) -{ - /* System generated locals */ - integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, - work_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *), scopy_(integer *, real *, - integer *, real *, integer *), strmm_(char *, char *, char *, - char *, integer *, integer *, real *, real *, integer *, real *, - integer *); - static char transt[1]; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLARFB applies a real block reflector H or its transpose H' to a - real m by n matrix C, from either the left or the right. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply H or H' from the Left - = 'R': apply H or H' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply H (No transpose) - = 'T': apply H' (Transpose) - - DIRECT (input) CHARACTER*1 - Indicates how H is formed from a product of elementary - reflectors - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Indicates how the vectors which define the elementary - reflectors are stored: - = 'C': Columnwise - = 'R': Rowwise - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - K (input) INTEGER - The order of the matrix T (= the number of elementary - reflectors whose product defines the block reflector). - - V (input) REAL array, dimension - (LDV,K) if STOREV = 'C' - (LDV,M) if STOREV = 'R' and SIDE = 'L' - (LDV,N) if STOREV = 'R' and SIDE = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); - if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); - if STOREV = 'R', LDV >= K. - - T (input) REAL array, dimension (LDT,K) - The triangular k by k matrix T in the representation of the - block reflector. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by H*C or H'*C or C*H or C*H'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= max(1,M). - - WORK (workspace) REAL array, dimension (LDWORK,K) - - LDWORK (input) INTEGER - The leading dimension of the array WORK. - If SIDE = 'L', LDWORK >= max(1,N); - if SIDE = 'R', LDWORK >= max(1,M). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - work_dim1 = *ldwork; - work_offset = 1 + work_dim1; - work -= work_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (lsame_(trans, "N")) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - - if (lsame_(storev, "C")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 ) (first K rows) - ( V2 ) - where V1 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); -/* L10: */ - } - -/* W := W * V1 */ - - strmm_("Right", "Lower", "No transpose", "Unit", n, k, & - c_b871, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2 */ - - i__1 = *m - *k; - sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b871, - &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], - ldv, &c_b871, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b871, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2 * W' */ - - i__1 = *m - *k; - sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1150, - &v[*k + 1 + v_dim1], ldv, &work[work_offset], - ldwork, &c_b871, &c__[*k + 1 + c_dim1], ldc); - } - -/* W := W * V1' */ - - strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b871, & - v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; -/* L20: */ - } -/* L30: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L40: */ - } - -/* W := W * V1 */ - - strmm_("Right", "Lower", "No transpose", "Unit", m, k, & - c_b871, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2 */ - - i__1 = *n - *k; - sgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b871, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + - 1 + v_dim1], ldv, &c_b871, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b871, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C2 := C2 - W * V2' */ - - i__1 = *n - *k; - sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1150, - &work[work_offset], ldwork, &v[*k + 1 + v_dim1], - ldv, &c_b871, &c__[(*k + 1) * c_dim1 + 1], ldc); - } - -/* W := W * V1' */ - - strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b871, & - v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; -/* L50: */ - } -/* L60: */ - } - } - - } else { - -/* - Let V = ( V1 ) - ( V2 ) (last K rows) - where V2 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); -/* L70: */ - } - -/* W := W * V2 */ - - strmm_("Right", "Upper", "No transpose", "Unit", n, k, & - c_b871, &v[*m - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - if (*m > *k) { - -/* W := W + C1'*V1 */ - - i__1 = *m - *k; - sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b871, - &c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, & - work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b871, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1 * W' */ - - i__1 = *m - *k; - sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1150, - &v[v_offset], ldv, &work[work_offset], ldwork, & - c_b871, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b871, & - v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * - work_dim1]; -/* L80: */ - } -/* L90: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L100: */ - } - -/* W := W * V2 */ - - strmm_("Right", "Upper", "No transpose", "Unit", m, k, & - c_b871, &v[*n - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - if (*n > *k) { - -/* W := W + C1 * V1 */ - - i__1 = *n - *k; - sgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b871, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b871, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b871, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C1 := C1 - W * V1' */ - - i__1 = *n - *k; - sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1150, - &work[work_offset], ldwork, &v[v_offset], ldv, & - c_b871, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b871, & - v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - -/* C2 := C2 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * - work_dim1]; -/* L110: */ - } -/* L120: */ - } - } - } - - } else if (lsame_(storev, "R")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 V2 ) (V1: first K columns) - where V1 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); -/* L130: */ - } - -/* W := W * V1' */ - - strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b871, & - v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2' */ - - i__1 = *m - *k; - sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b871, & - c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + - 1], ldv, &c_b871, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b871, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2' * W' */ - - i__1 = *m - *k; - sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1150, & - v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset], - ldwork, &c_b871, &c__[*k + 1 + c_dim1], ldc); - } - -/* W := W * V1 */ - - strmm_("Right", "Upper", "No transpose", "Unit", n, k, & - c_b871, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; -/* L140: */ - } -/* L150: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L160: */ - } - -/* W := W * V1' */ - - strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b871, & - v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2' */ - - i__1 = *n - *k; - sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b871, - &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * - v_dim1 + 1], ldv, &c_b871, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b871, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C2 := C2 - W * V2 */ - - i__1 = *n - *k; - sgemm_("No transpose", "No transpose", m, &i__1, k, & - c_b1150, &work[work_offset], ldwork, &v[(*k + 1) * - v_dim1 + 1], ldv, &c_b871, &c__[(*k + 1) * - c_dim1 + 1], ldc); - } - -/* W := W * V1 */ - - strmm_("Right", "Upper", "No transpose", "Unit", m, k, & - c_b871, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; -/* L170: */ - } -/* L180: */ - } - - } - - } else { - -/* - Let V = ( V1 V2 ) (V2: last K columns) - where V2 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); -/* L190: */ - } - -/* W := W * V2' */ - - strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b871, & - v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] - , ldwork); - if (*m > *k) { - -/* W := W + C1'*V1' */ - - i__1 = *m - *k; - sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b871, & - c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, & - work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b871, & - t[t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1' * W' */ - - i__1 = *m - *k; - sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1150, & - v[v_offset], ldv, &work[work_offset], ldwork, & - c_b871, &c__[c_offset], ldc) - ; - } - -/* W := W * V2 */ - - strmm_("Right", "Lower", "No transpose", "Unit", n, k, & - c_b871, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * - work_dim1]; -/* L200: */ - } -/* L210: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L220: */ - } - -/* W := W * V2' */ - - strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b871, & - v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] - , ldwork); - if (*n > *k) { - -/* W := W + C1 * V1' */ - - i__1 = *n - *k; - sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b871, - &c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, & - work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b871, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C1 := C1 - W * V1 */ - - i__1 = *n - *k; - sgemm_("No transpose", "No transpose", m, &i__1, k, & - c_b1150, &work[work_offset], ldwork, &v[v_offset], - ldv, &c_b871, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - strmm_("Right", "Lower", "No transpose", "Unit", m, k, & - c_b871, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * - work_dim1]; -/* L230: */ - } -/* L240: */ - } - - } - - } - } - - return 0; - -/* End of SLARFB */ - -} /* slarfb_ */ - -/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx, - real *tau) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Builtin functions */ - double r_sign(real *, real *); - - /* Local variables */ - static integer j, knt; - static real beta; - extern doublereal snrm2_(integer *, real *, integer *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static real xnorm; - extern doublereal slapy2_(real *, real *), slamch_(char *); - static real safmin, rsafmn; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLARFG generates a real elementary reflector H of order n, such - that - - H * ( alpha ) = ( beta ), H' * H = I. - ( x ) ( 0 ) - - where alpha and beta are scalars, and x is an (n-1)-element real - vector. H is represented in the form - - H = I - tau * ( 1 ) * ( 1 v' ) , - ( v ) - - where tau is a real scalar and v is a real (n-1)-element - vector. - - If the elements of x are all zero, then tau = 0 and H is taken to be - the unit matrix. - - Otherwise 1 <= tau <= 2. - - Arguments - ========= - - N (input) INTEGER - The order of the elementary reflector. - - ALPHA (input/output) REAL - On entry, the value alpha. - On exit, it is overwritten with the value beta. - - X (input/output) REAL array, dimension - (1+(N-2)*abs(INCX)) - On entry, the vector x. - On exit, it is overwritten with the vector v. - - INCX (input) INTEGER - The increment between elements of X. INCX > 0. - - TAU (output) REAL - The value tau. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n <= 1) { - *tau = 0.f; - return 0; - } - - i__1 = *n - 1; - xnorm = snrm2_(&i__1, &x[1], incx); - - if (xnorm == 0.f) { - -/* H = I */ - - *tau = 0.f; - } else { - -/* general case */ - - r__1 = slapy2_(alpha, &xnorm); - beta = -r_sign(&r__1, alpha); - safmin = slamch_("S") / slamch_("E"); - if (dabs(beta) < safmin) { - -/* XNORM, BETA may be inaccurate; scale X and recompute them */ - - rsafmn = 1.f / safmin; - knt = 0; -L10: - ++knt; - i__1 = *n - 1; - sscal_(&i__1, &rsafmn, &x[1], incx); - beta *= rsafmn; - *alpha *= rsafmn; - if (dabs(beta) < safmin) { - goto L10; - } - -/* New BETA is at most 1, at least SAFMIN */ - - i__1 = *n - 1; - xnorm = snrm2_(&i__1, &x[1], incx); - r__1 = slapy2_(alpha, &xnorm); - beta = -r_sign(&r__1, alpha); - *tau = (beta - *alpha) / beta; - i__1 = *n - 1; - r__1 = 1.f / (*alpha - beta); - sscal_(&i__1, &r__1, &x[1], incx); - -/* If ALPHA is subnormal, it may lose relative accuracy */ - - *alpha = beta; - i__1 = knt; - for (j = 1; j <= i__1; ++j) { - *alpha *= safmin; -/* L20: */ - } - } else { - *tau = (beta - *alpha) / beta; - i__1 = *n - 1; - r__1 = 1.f / (*alpha - beta); - sscal_(&i__1, &r__1, &x[1], incx); - *alpha = beta; - } - } - - return 0; - -/* End of SLARFG */ - -} /* slarfg_ */ - -/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer * - k, real *v, integer *ldv, real *tau, real *t, integer *ldt) -{ - /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; - real r__1; - - /* Local variables */ - static integer i__, j; - static real vii; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, - integer *, real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLARFT forms the triangular factor T of a real block reflector H - of order n, which is defined as a product of k elementary reflectors. - - If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; - - If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. - - If STOREV = 'C', the vector which defines the elementary reflector - H(i) is stored in the i-th column of the array V, and - - H = I - V * T * V' - - If STOREV = 'R', the vector which defines the elementary reflector - H(i) is stored in the i-th row of the array V, and - - H = I - V' * T * V - - Arguments - ========= - - DIRECT (input) CHARACTER*1 - Specifies the order in which the elementary reflectors are - multiplied to form the block reflector: - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Specifies how the vectors which define the elementary - reflectors are stored (see also Further Details): - = 'C': columnwise - = 'R': rowwise - - N (input) INTEGER - The order of the block reflector H. N >= 0. - - K (input) INTEGER - The order of the triangular factor T (= the number of - elementary reflectors). K >= 1. - - V (input/output) REAL array, dimension - (LDV,K) if STOREV = 'C' - (LDV,N) if STOREV = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i). - - T (output) REAL array, dimension (LDT,K) - The k by k triangular factor T of the block reflector. - If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is - lower triangular. The rest of the array is not used. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - Further Details - =============== - - The shape of the matrix V and the storage of the vectors which define - the H(i) is best illustrated by the following example with n = 5 and - k = 3. The elements equal to 1 are not stored; the corresponding - array elements are modified but restored on exit. The rest of the - array is not used. - - DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': - - V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) - ( v1 1 ) ( 1 v2 v2 v2 ) - ( v1 v2 1 ) ( 1 v3 v3 ) - ( v1 v2 v3 ) - ( v1 v2 v3 ) - - DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': - - V = ( v1 v2 v3 ) V = ( v1 v1 1 ) - ( v1 v2 v3 ) ( v2 v2 v2 1 ) - ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) - ( 1 v3 ) - ( 1 ) - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - --tau; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - - /* Function Body */ - if (*n == 0) { - return 0; - } - - if (lsame_(direct, "F")) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - if (tau[i__] == 0.f) { - -/* H(i) = I */ - - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - t[j + i__ * t_dim1] = 0.f; -/* L10: */ - } - } else { - -/* general case */ - - vii = v[i__ + i__ * v_dim1]; - v[i__ + i__ * v_dim1] = 1.f; - if (lsame_(storev, "C")) { - -/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - r__1 = -tau[i__]; - sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1], - ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b1101, &t[ - i__ * t_dim1 + 1], &c__1); - } else { - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ - - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - r__1 = -tau[i__]; - sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ * - v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & - c_b1101, &t[i__ * t_dim1 + 1], &c__1); - } - v[i__ + i__ * v_dim1] = vii; - -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - t[i__ + i__ * t_dim1] = tau[i__]; - } -/* L20: */ - } - } else { - for (i__ = *k; i__ >= 1; --i__) { - if (tau[i__] == 0.f) { - -/* H(i) = I */ - - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - t[j + i__ * t_dim1] = 0.f; -/* L30: */ - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { - vii = v[*n - *k + i__ + i__ * v_dim1]; - v[*n - *k + i__ + i__ * v_dim1] = 1.f; - -/* - T(i+1:k,i) := - - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) -*/ - - i__1 = *n - *k + i__; - i__2 = *k - i__; - r__1 = -tau[i__]; - sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1) - * v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], & - c__1, &c_b1101, &t[i__ + 1 + i__ * t_dim1], & - c__1); - v[*n - *k + i__ + i__ * v_dim1] = vii; - } else { - vii = v[i__ + (*n - *k + i__) * v_dim1]; - v[i__ + (*n - *k + i__) * v_dim1] = 1.f; - -/* - T(i+1:k,i) := - - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' -*/ - - i__1 = *k - i__; - i__2 = *n - *k + i__; - r__1 = -tau[i__]; - sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + - 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & - c_b1101, &t[i__ + 1 + i__ * t_dim1], &c__1); - v[i__ + (*n - *k + i__) * v_dim1] = vii; - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - } - t[i__ + i__ * t_dim1] = tau[i__]; - } -/* L40: */ - } - } - return 0; - -/* End of SLARFT */ - -} /* slarft_ */ - -/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v, - real *tau, real *c__, integer *ldc, real *work) -{ - /* System generated locals */ - integer c_dim1, c_offset, i__1; - real r__1; - - /* Local variables */ - static integer j; - static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, - v7, v8, v9, t10, v10, sum; - extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, - integer *, real *, integer *, real *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLARFX applies a real elementary reflector H to a real m by n - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a real scalar and v is a real vector. - - If tau = 0, then H is taken to be the unit matrix - - This version uses inline code if H has order < 11. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) REAL array, dimension (M) if SIDE = 'L' - or (N) if SIDE = 'R' - The vector v in the representation of H. - - TAU (input) REAL - The value tau in the representation of H. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= (1,M). - - WORK (workspace) REAL array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - WORK is not referenced if H has order < 11. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (*tau == 0.f) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form H * C, where H has order m. */ - - switch (*m) { - case 1: goto L10; - case 2: goto L30; - case 3: goto L50; - case 4: goto L70; - case 5: goto L90; - case 6: goto L110; - case 7: goto L130; - case 8: goto L150; - case 9: goto L170; - case 10: goto L190; - } - -/* - Code for general M - - w := C'*v -*/ - - sgemv_("Transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], &c__1, - &c_b1101, &work[1], &c__1); - -/* C := C - tau * v * w' */ - - r__1 = -(*tau); - sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc) - ; - goto L410; -L10: - -/* Special code for 1 x 1 Householder */ - - t1 = 1.f - *tau * v[1] * v[1]; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; -/* L20: */ - } - goto L410; -L30: - -/* Special code for 2 x 2 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; -/* L40: */ - } - goto L410; -L50: - -/* Special code for 3 x 3 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; -/* L60: */ - } - goto L410; -L70: - -/* Special code for 4 x 4 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; -/* L80: */ - } - goto L410; -L90: - -/* Special code for 5 x 5 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; -/* L100: */ - } - goto L410; -L110: - -/* Special code for 6 x 6 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; -/* L120: */ - } - goto L410; -L130: - -/* Special code for 7 x 7 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; -/* L140: */ - } - goto L410; -L150: - -/* Special code for 8 x 8 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; -/* L160: */ - } - goto L410; -L170: - -/* Special code for 9 x 9 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * - c_dim1 + 9]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; - c__[j * c_dim1 + 9] -= sum * t9; -/* L180: */ - } - goto L410; -L190: - -/* Special code for 10 x 10 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - v10 = v[10]; - t10 = *tau * v10; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * - c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ - j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * - c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * - c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; - c__[j * c_dim1 + 1] -= sum * t1; - c__[j * c_dim1 + 2] -= sum * t2; - c__[j * c_dim1 + 3] -= sum * t3; - c__[j * c_dim1 + 4] -= sum * t4; - c__[j * c_dim1 + 5] -= sum * t5; - c__[j * c_dim1 + 6] -= sum * t6; - c__[j * c_dim1 + 7] -= sum * t7; - c__[j * c_dim1 + 8] -= sum * t8; - c__[j * c_dim1 + 9] -= sum * t9; - c__[j * c_dim1 + 10] -= sum * t10; -/* L200: */ - } - goto L410; - } else { - -/* Form C * H, where H has order n. */ - - switch (*n) { - case 1: goto L210; - case 2: goto L230; - case 3: goto L250; - case 4: goto L270; - case 5: goto L290; - case 6: goto L310; - case 7: goto L330; - case 8: goto L350; - case 9: goto L370; - case 10: goto L390; - } - -/* - Code for general N - - w := C * v -*/ - - sgemv_("No transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], & - c__1, &c_b1101, &work[1], &c__1); - -/* C := C - tau * w * v' */ - - r__1 = -(*tau); - sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc) - ; - goto L410; -L210: - -/* Special code for 1 x 1 Householder */ - - t1 = 1.f - *tau * v[1] * v[1]; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - c__[j + c_dim1] = t1 * c__[j + c_dim1]; -/* L220: */ - } - goto L410; -L230: - -/* Special code for 2 x 2 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; -/* L240: */ - } - goto L410; -L250: - -/* Special code for 3 x 3 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; -/* L260: */ - } - goto L410; -L270: - -/* Special code for 4 x 4 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; -/* L280: */ - } - goto L410; -L290: - -/* Special code for 5 x 5 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; -/* L300: */ - } - goto L410; -L310: - -/* Special code for 6 x 6 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; -/* L320: */ - } - goto L410; -L330: - -/* Special code for 7 x 7 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; -/* L340: */ - } - goto L410; -L350: - -/* Special code for 8 x 8 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; -/* L360: */ - } - goto L410; -L370: - -/* Special code for 9 x 9 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ - j + c_dim1 * 9]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; - c__[j + c_dim1 * 9] -= sum * t9; -/* L380: */ - } - goto L410; -L390: - -/* Special code for 10 x 10 Householder */ - - v1 = v[1]; - t1 = *tau * v1; - v2 = v[2]; - t2 = *tau * v2; - v3 = v[3]; - t3 = *tau * v3; - v4 = v[4]; - t4 = *tau * v4; - v5 = v[5]; - t5 = *tau * v5; - v6 = v[6]; - t6 = *tau * v6; - v7 = v[7]; - t7 = *tau * v7; - v8 = v[8]; - t8 = *tau * v8; - v9 = v[9]; - t9 = *tau * v9; - v10 = v[10]; - t10 = *tau * v10; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * - c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * - c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ - j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ - j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; - c__[j + c_dim1] -= sum * t1; - c__[j + (c_dim1 << 1)] -= sum * t2; - c__[j + c_dim1 * 3] -= sum * t3; - c__[j + (c_dim1 << 2)] -= sum * t4; - c__[j + c_dim1 * 5] -= sum * t5; - c__[j + c_dim1 * 6] -= sum * t6; - c__[j + c_dim1 * 7] -= sum * t7; - c__[j + (c_dim1 << 3)] -= sum * t8; - c__[j + c_dim1 * 9] -= sum * t9; - c__[j + c_dim1 * 10] -= sum * t10; -/* L400: */ - } - goto L410; - } -L410: - return 0; - -/* End of SLARFX */ - -} /* slarfx_ */ - -/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__) -{ - /* Initialized data */ - - static logical first = TRUE_; - - /* System generated locals */ - integer i__1; - real r__1, r__2; - - /* Builtin functions */ - double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal); - - /* Local variables */ - static integer i__; - static real f1, g1, eps, scale; - static integer count; - static real safmn2, safmx2; - extern doublereal slamch_(char *); - static real safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLARTG generate a plane rotation so that - - [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. - [ -SN CS ] [ G ] [ 0 ] - - This is a slower, more accurate version of the BLAS1 routine SROTG, - with the following other differences: - F and G are unchanged on return. - If G=0, then CS=1 and SN=0. - If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any - floating point operations (saves work in SBDSQR when - there are zeros on the diagonal). - - If F exceeds G in magnitude, CS will be positive. - - Arguments - ========= - - F (input) REAL - The first component of vector to be rotated. - - G (input) REAL - The second component of vector to be rotated. - - CS (output) REAL - The cosine of the rotation. - - SN (output) REAL - The sine of the rotation. - - R (output) REAL - The nonzero component of the rotated vector. - - ===================================================================== -*/ - - - if (first) { - first = FALSE_; - safmin = slamch_("S"); - eps = slamch_("E"); - r__1 = slamch_("B"); - i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / - 2.f); - safmn2 = pow_ri(&r__1, &i__1); - safmx2 = 1.f / safmn2; - } - if (*g == 0.f) { - *cs = 1.f; - *sn = 0.f; - *r__ = *f; - } else if (*f == 0.f) { - *cs = 0.f; - *sn = 1.f; - *r__ = *g; - } else { - f1 = *f; - g1 = *g; -/* Computing MAX */ - r__1 = dabs(f1), r__2 = dabs(g1); - scale = dmax(r__1,r__2); - if (scale >= safmx2) { - count = 0; -L10: - ++count; - f1 *= safmn2; - g1 *= safmn2; -/* Computing MAX */ - r__1 = dabs(f1), r__2 = dabs(g1); - scale = dmax(r__1,r__2); - if (scale >= safmx2) { - goto L10; - } -/* Computing 2nd power */ - r__1 = f1; -/* Computing 2nd power */ - r__2 = g1; - *r__ = sqrt(r__1 * r__1 + r__2 * r__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - i__1 = count; - for (i__ = 1; i__ <= i__1; ++i__) { - *r__ *= safmx2; -/* L20: */ - } - } else if (scale <= safmn2) { - count = 0; -L30: - ++count; - f1 *= safmx2; - g1 *= safmx2; -/* Computing MAX */ - r__1 = dabs(f1), r__2 = dabs(g1); - scale = dmax(r__1,r__2); - if (scale <= safmn2) { - goto L30; - } -/* Computing 2nd power */ - r__1 = f1; -/* Computing 2nd power */ - r__2 = g1; - *r__ = sqrt(r__1 * r__1 + r__2 * r__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - i__1 = count; - for (i__ = 1; i__ <= i__1; ++i__) { - *r__ *= safmn2; -/* L40: */ - } - } else { -/* Computing 2nd power */ - r__1 = f1; -/* Computing 2nd power */ - r__2 = g1; - *r__ = sqrt(r__1 * r__1 + r__2 * r__2); - *cs = f1 / *r__; - *sn = g1 / *r__; - } - if (dabs(*f) > dabs(*g) && *cs < 0.f) { - *cs = -(*cs); - *sn = -(*sn); - *r__ = -(*r__); - } - } - return 0; - -/* End of SLARTG */ - -} /* slartg_ */ - -/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real * - ssmax) -{ - /* System generated locals */ - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real c__, fa, ga, ha, as, at, au, fhmn, fhmx; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SLAS2 computes the singular values of the 2-by-2 matrix - [ F G ] - [ 0 H ]. - On return, SSMIN is the smaller singular value and SSMAX is the - larger singular value. - - Arguments - ========= - - F (input) REAL - The (1,1) element of the 2-by-2 matrix. - - G (input) REAL - The (1,2) element of the 2-by-2 matrix. - - H (input) REAL - The (2,2) element of the 2-by-2 matrix. - - SSMIN (output) REAL - The smaller singular value. - - SSMAX (output) REAL - The larger singular value. - - Further Details - =============== - - Barring over/underflow, all output quantities are correct to within - a few units in the last place (ulps), even in the absence of a guard - digit in addition/subtraction. - - In IEEE arithmetic, the code works correctly if one matrix element is - infinite. - - Overflow will not occur unless the largest singular value itself - overflows, or is within a few ulps of overflow. (On machines with - partial overflow, like the Cray, overflow may occur if the largest - singular value is within a factor of 2 of overflow.) - - Underflow is harmless if underflow is gradual. Otherwise, results - may correspond to a matrix modified by perturbations of size near - the underflow threshold. - - ==================================================================== -*/ - - - fa = dabs(*f); - ga = dabs(*g); - ha = dabs(*h__); - fhmn = dmin(fa,ha); - fhmx = dmax(fa,ha); - if (fhmn == 0.f) { - *ssmin = 0.f; - if (fhmx == 0.f) { - *ssmax = ga; - } else { -/* Computing 2nd power */ - r__1 = dmin(fhmx,ga) / dmax(fhmx,ga); - *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f); - } - } else { - if (ga < fhmx) { - as = fhmn / fhmx + 1.f; - at = (fhmx - fhmn) / fhmx; -/* Computing 2nd power */ - r__1 = ga / fhmx; - au = r__1 * r__1; - c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au)); - *ssmin = fhmn * c__; - *ssmax = fhmx / c__; - } else { - au = fhmx / ga; - if (au == 0.f) { - -/* - Avoid possible harmful underflow if exponent range - asymmetric (true SSMIN may not underflow even if - AU underflows) -*/ - - *ssmin = fhmn * fhmx / ga; - *ssmax = ga; - } else { - as = fhmn / fhmx + 1.f; - at = (fhmx - fhmn) / fhmx; -/* Computing 2nd power */ - r__1 = as * au; -/* Computing 2nd power */ - r__2 = at * au; - c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f) - ); - *ssmin = fhmn * c__ * au; - *ssmin += *ssmin; - *ssmax = ga / (c__ + c__); - } - } - } - return 0; - -/* End of SLAS2 */ - -} /* slas2_ */ - -/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real * - cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - - /* Local variables */ - static integer i__, j, k1, k2, k3, k4; - static real mul, cto1; - static logical done; - static real ctoc; - extern logical lsame_(char *, char *); - static integer itype; - static real cfrom1; - extern doublereal slamch_(char *); - static real cfromc; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real bignum, smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLASCL multiplies the M by N real matrix A by the real scalar - CTO/CFROM. This is done without over/underflow as long as the final - result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that - A may be full, upper triangular, lower triangular, upper Hessenberg, - or banded. - - Arguments - ========= - - TYPE (input) CHARACTER*1 - TYPE indices the storage type of the input matrix. - = 'G': A is a full matrix. - = 'L': A is a lower triangular matrix. - = 'U': A is an upper triangular matrix. - = 'H': A is an upper Hessenberg matrix. - = 'B': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the lower - half stored. - = 'Q': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the upper - half stored. - = 'Z': A is a band matrix with lower bandwidth KL and upper - bandwidth KU. - - KL (input) INTEGER - The lower bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - KU (input) INTEGER - The upper bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - CFROM (input) REAL - CTO (input) REAL - The matrix A is multiplied by CTO/CFROM. A(I,J) is computed - without over/underflow if the final result CTO*A(I,J)/CFROM - can be represented without over/underflow. CFROM must be - nonzero. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,M) - The matrix to be multiplied by CTO/CFROM. See TYPE for the - storage type. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - INFO (output) INTEGER - 0 - successful exit - <0 - if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - - if (lsame_(type__, "G")) { - itype = 0; - } else if (lsame_(type__, "L")) { - itype = 1; - } else if (lsame_(type__, "U")) { - itype = 2; - } else if (lsame_(type__, "H")) { - itype = 3; - } else if (lsame_(type__, "B")) { - itype = 4; - } else if (lsame_(type__, "Q")) { - itype = 5; - } else if (lsame_(type__, "Z")) { - itype = 6; - } else { - itype = -1; - } - - if (itype == -1) { - *info = -1; - } else if (*cfrom == 0.f) { - *info = -4; - } else if (*m < 0) { - *info = -6; - } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { - *info = -7; - } else if (itype <= 3 && *lda < max(1,*m)) { - *info = -9; - } else if (itype >= 4) { -/* Computing MAX */ - i__1 = *m - 1; - if (*kl < 0 || *kl > max(i__1,0)) { - *info = -2; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *n - 1; - if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && - *kl != *ku) { - *info = -3; - } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * - ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { - *info = -9; - } - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASCL", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *m == 0) { - return 0; - } - -/* Get machine parameters */ - - smlnum = slamch_("S"); - bignum = 1.f / smlnum; - - cfromc = *cfrom; - ctoc = *cto; - -L10: - cfrom1 = cfromc * smlnum; - cto1 = ctoc / bignum; - if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { - mul = smlnum; - done = FALSE_; - cfromc = cfrom1; - } else if (dabs(cto1) > dabs(cfromc)) { - mul = bignum; - done = FALSE_; - ctoc = cto1; - } else { - mul = ctoc / cfromc; - done = TRUE_; - } - - if (itype == 0) { - -/* Full matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L20: */ - } -/* L30: */ - } - - } else if (itype == 1) { - -/* Lower triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L40: */ - } -/* L50: */ - } - - } else if (itype == 2) { - -/* Upper triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L60: */ - } -/* L70: */ - } - - } else if (itype == 3) { - -/* Upper Hessenberg matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j + 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L80: */ - } -/* L90: */ - } - - } else if (itype == 4) { - -/* Lower half of a symmetric band matrix */ - - k3 = *kl + 1; - k4 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = k3, i__4 = k4 - j; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L100: */ - } -/* L110: */ - } - - } else if (itype == 5) { - -/* Upper half of a symmetric band matrix */ - - k1 = *ku + 2; - k3 = *ku + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = k1 - j; - i__3 = k3; - for (i__ = max(i__2,1); i__ <= i__3; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L120: */ - } -/* L130: */ - } - - } else if (itype == 6) { - -/* Band matrix */ - - k1 = *kl + *ku + 2; - k2 = *kl + 1; - k3 = (*kl << 1) + *ku + 1; - k4 = *kl + *ku + 1 + *m; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__3 = k1 - j; -/* Computing MIN */ - i__4 = k3, i__5 = k4 - j; - i__2 = min(i__4,i__5); - for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] *= mul; -/* L140: */ - } -/* L150: */ - } - - } - - if (! done) { - goto L10; - } - - return 0; - -/* End of SLASCL */ - -} /* slascl_ */ - -/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, - real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, - integer *iwork, real *work, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, - iwk, lvl, ndb1, nlp1, nrp1; - static real beta; - static integer idxq, nlvl; - static real alpha; - static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; - extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real - *, real *, real *, real *, integer *, real *, integer *, integer * - , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer - *, integer *, real *, real *, real *, integer *, real *, integer * - , real *, integer *, real *, integer *), slasdt_(integer * - , integer *, integer *, integer *, integer *, integer *, integer * - ); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - Using a divide and conquer approach, SLASD0 computes the singular - value decomposition (SVD) of a real upper bidiagonal N-by-M - matrix B with diagonal D and offdiagonal E, where M = N + SQRE. - The algorithm computes orthogonal matrices U and VT such that - B = U * S * VT. The singular values S are overwritten on D. - - A related subroutine, SLASDA, computes only the singular values, - and optionally, the singular vectors in compact form. - - Arguments - ========= - - N (input) INTEGER - On entry, the row dimension of the upper bidiagonal matrix. - This is also the dimension of the main diagonal array D. - - SQRE (input) INTEGER - Specifies the column dimension of the bidiagonal matrix. - = 0: The bidiagonal matrix has column dimension M = N; - = 1: The bidiagonal matrix has column dimension M = N+1; - - D (input/output) REAL array, dimension (N) - On entry D contains the main diagonal of the bidiagonal - matrix. - On exit D, if INFO = 0, contains its singular values. - - E (input) REAL array, dimension (M-1) - Contains the subdiagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - U (output) REAL array, dimension at least (LDQ, N) - On exit, U contains the left singular vectors. - - LDU (input) INTEGER - On entry, leading dimension of U. - - VT (output) REAL array, dimension at least (LDVT, M) - On exit, VT' contains the right singular vectors. - - LDVT (input) INTEGER - On entry, leading dimension of VT. - - SMLSIZ (input) INTEGER - On entry, maximum size of the subproblems at the - bottom of the computation tree. - - IWORK INTEGER work array. - Dimension must be at least (8 * N) - - WORK REAL work array. - Dimension must be at least (3 * M**2 + 2 * M) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --iwork; - --work; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -1; - } else if (*sqre < 0 || *sqre > 1) { - *info = -2; - } - - m = *n + *sqre; - - if (*ldu < *n) { - *info = -6; - } else if (*ldvt < m) { - *info = -8; - } else if (*smlsiz < 3) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD0", &i__1); - return 0; - } - -/* If the input matrix is too small, call SLASDQ to find the SVD. */ - - if (*n <= *smlsiz) { - slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], - ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); - return 0; - } - -/* Set up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - idxq = ndimr + *n; - iwk = idxq + *n; - slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - For the nodes on bottom level of the tree, solve - their subproblems by SLASDQ. -*/ - - ndb1 = (nd + 1) / 2; - ncc = 0; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nlp1 = nl + 1; - nr = iwork[ndimr + i1]; - nrp1 = nr + 1; - nlf = ic - nl; - nrf = ic + 1; - sqrei = 1; - slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ - nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ - nlf + nlf * u_dim1], ldu, &work[1], info); - if (*info != 0) { - return 0; - } - itemp = idxq + nlf - 2; - i__2 = nl; - for (j = 1; j <= i__2; ++j) { - iwork[itemp + j] = j; -/* L10: */ - } - if (i__ == nd) { - sqrei = *sqre; - } else { - sqrei = 1; - } - nrp1 = nr + sqrei; - slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ - nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ - nrf + nrf * u_dim1], ldu, &work[1], info); - if (*info != 0) { - return 0; - } - itemp = idxq + ic; - i__2 = nr; - for (j = 1; j <= i__2; ++j) { - iwork[itemp + j - 1] = j; -/* L20: */ - } -/* L30: */ - } - -/* Now conquer each subproblem bottom-up. */ - - for (lvl = nlvl; lvl >= 1; --lvl) { - -/* - Find the first node LF and last node LL on the - current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - if (*sqre == 0 && i__ == ll) { - sqrei = *sqre; - } else { - sqrei = 1; - } - idxqc = idxq + nlf - 1; - alpha = d__[ic]; - beta = e[ic]; - slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * - u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ - idxqc], &iwork[iwk], &work[1], info); - if (*info != 0) { - return 0; - } -/* L40: */ - } -/* L50: */ - } - - return 0; - -/* End of SLASD0 */ - -} /* slasd0_ */ - -/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real * - d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, - integer *ldvt, integer *idxq, integer *iwork, real *work, integer * - info) -{ - /* System generated locals */ - integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; - real r__1, r__2; - - /* Local variables */ - static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, - idxc, idxp, ldvt2; - extern /* Subroutine */ int slasd2_(integer *, integer *, integer *, - integer *, real *, real *, real *, real *, real *, integer *, - real *, integer *, real *, real *, integer *, real *, integer *, - integer *, integer *, integer *, integer *, integer *, integer *), - slasd3_(integer *, integer *, integer *, integer *, real *, real - *, integer *, real *, real *, integer *, real *, integer *, real * - , integer *, real *, integer *, integer *, integer *, real *, - integer *); - static integer isigma; - extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( - char *, integer *, integer *, real *, real *, integer *, integer * - , real *, integer *, integer *), slamrg_(integer *, - integer *, real *, integer *, integer *, integer *); - static real orgnrm; - static integer coltyp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, - where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. - - A related subroutine SLASD7 handles the case in which the singular - values (and the singular vectors in factored form) are desired. - - SLASD1 computes the SVD as follows: - - ( D1(in) 0 0 0 ) - B = U(in) * ( Z1' a Z2' b ) * VT(in) - ( 0 0 D2(in) 0 ) - - = U(out) * ( D(out) 0) * VT(out) - - where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M - with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros - elsewhere; and the entry b is empty if SQRE = 0. - - The left singular vectors of the original matrix are stored in U, and - the transpose of the right singular vectors are stored in VT, and the - singular values are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple singular values or when there are zeros in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine SLASD2. - - The second stage consists of calculating the updated - singular values. This is done by finding the square roots of the - roots of the secular equation via the routine SLASD4 (as called - by SLASD3). This routine also calculates the singular vectors of - the current problem. - - The final stage consists of computing the updated singular vectors - directly using the updated singular values. The singular vectors - for the current problem are multiplied with the singular vectors - from the overall problem. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - D (input/output) REAL array, - dimension (N = NL+NR+1). - On entry D(1:NL,1:NL) contains the singular values of the - upper block; and D(NL+2:N) contains the singular values of - the lower block. On exit D(1:N) contains the singular values - of the modified matrix. - - ALPHA (input) REAL - Contains the diagonal element associated with the added row. - - BETA (input) REAL - Contains the off-diagonal element associated with the added - row. - - U (input/output) REAL array, dimension(LDU,N) - On entry U(1:NL, 1:NL) contains the left singular vectors of - the upper block; U(NL+2:N, NL+2:N) contains the left singular - vectors of the lower block. On exit U contains the left - singular vectors of the bidiagonal matrix. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= max( 1, N ). - - VT (input/output) REAL array, dimension(LDVT,M) - where M = N + SQRE. - On entry VT(1:NL+1, 1:NL+1)' contains the right singular - vectors of the upper block; VT(NL+2:M, NL+2:M)' contains - the right singular vectors of the lower block. On exit - VT' contains the right singular vectors of the - bidiagonal matrix. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= max( 1, M ). - - IDXQ (output) INTEGER array, dimension(N) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, i.e. - D( IDXQ( I = 1, N ) ) will be in ascending order. - - IWORK (workspace) INTEGER array, dimension( 4 * N ) - - WORK (workspace) REAL array, dimension( 3*M**2 + 2*M ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --idxq; - --iwork; - --work; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre < 0 || *sqre > 1) { - *info = -3; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD1", &i__1); - return 0; - } - - n = *nl + *nr + 1; - m = n + *sqre; - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in SLASD2 and SLASD3. -*/ - - ldu2 = n; - ldvt2 = m; - - iz = 1; - isigma = iz + m; - iu2 = isigma + n; - ivt2 = iu2 + ldu2 * n; - iq = ivt2 + ldvt2 * m; - - idx = 1; - idxc = idx + n; - coltyp = idxc + n; - idxp = coltyp + n; - -/* - Scale. - - Computing MAX -*/ - r__1 = dabs(*alpha), r__2 = dabs(*beta); - orgnrm = dmax(r__1,r__2); - d__[*nl + 1] = 0.f; - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { - orgnrm = (r__1 = d__[i__], dabs(r__1)); - } -/* L10: */ - } - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &n, &c__1, &d__[1], &n, info); - *alpha /= orgnrm; - *beta /= orgnrm; - -/* Deflate singular values. */ - - slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], - ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & - work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & - idxq[1], &iwork[coltyp], info); - -/* Solve Secular Equation and update singular vectors. */ - - ldq = k; - slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ - u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ - ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); - if (*info != 0) { - return 0; - } - -/* Unscale. */ - - slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &n, &c__1, &d__[1], &n, info); - -/* Prepare the IDXQ sorting permutation. */ - - n1 = k; - n2 = n - k; - slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); - - return 0; - -/* End of SLASD1 */ - -} /* slasd1_ */ - -/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer - *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * - ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, - real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, - integer *idxq, integer *coltyp, integer *info) -{ - /* System generated locals */ - integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, - vt2_dim1, vt2_offset, i__1; - real r__1, r__2; - - /* Local variables */ - static real c__; - static integer i__, j, m, n; - static real s; - static integer k2; - static real z1; - static integer ct, jp; - static real eps, tau, tol; - static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *); - static integer idxjp, jprev; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - extern doublereal slapy2_(real *, real *), slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( - integer *, integer *, real *, integer *, integer *, integer *); - static real hlftol; - extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, - integer *, real *, integer *), slaset_(char *, integer *, - integer *, real *, real *, real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASD2 merges the two sets of singular values together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - singular values are close together or if there is a tiny entry in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - SLASD2 is called from SLASD1. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - D (input/output) REAL array, dimension(N) - On entry D contains the singular values of the two submatrices - to be combined. On exit D contains the trailing (N-K) updated - singular values (those which were deflated) sorted into - increasing order. - - ALPHA (input) REAL - Contains the diagonal element associated with the added row. - - BETA (input) REAL - Contains the off-diagonal element associated with the added - row. - - U (input/output) REAL array, dimension(LDU,N) - On entry U contains the left singular vectors of two - submatrices in the two square blocks with corners at (1,1), - (NL, NL), and (NL+2, NL+2), (N,N). - On exit U contains the trailing (N-K) updated left singular - vectors (those which were deflated) in its last N-K columns. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= N. - - Z (output) REAL array, dimension(N) - On exit Z contains the updating row vector in the secular - equation. - - DSIGMA (output) REAL array, dimension (N) - Contains a copy of the diagonal elements (K-1 singular values - and one zero) in the secular equation. - - U2 (output) REAL array, dimension(LDU2,N) - Contains a copy of the first K-1 left singular vectors which - will be used by SLASD3 in a matrix multiply (SGEMM) to solve - for the new left singular vectors. U2 is arranged into four - blocks. The first block contains a column with 1 at NL+1 and - zero everywhere else; the second block contains non-zero - entries only at and above NL; the third contains non-zero - entries only below NL+1; and the fourth is dense. - - LDU2 (input) INTEGER - The leading dimension of the array U2. LDU2 >= N. - - VT (input/output) REAL array, dimension(LDVT,M) - On entry VT' contains the right singular vectors of two - submatrices in the two square blocks with corners at (1,1), - (NL+1, NL+1), and (NL+2, NL+2), (M,M). - On exit VT' contains the trailing (N-K) updated right singular - vectors (those which were deflated) in its last N-K columns. - In case SQRE =1, the last row of VT spans the right null - space. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= M. - - VT2 (output) REAL array, dimension(LDVT2,N) - VT2' contains a copy of the first K right singular vectors - which will be used by SLASD3 in a matrix multiply (SGEMM) to - solve for the new right singular vectors. VT2 is arranged into - three blocks. The first block contains a row that corresponds - to the special 0 diagonal element in SIGMA; the second block - contains non-zeros only at and before NL +1; the third block - contains non-zeros only at and after NL +2. - - LDVT2 (input) INTEGER - The leading dimension of the array VT2. LDVT2 >= M. - - IDXP (workspace) INTEGER array, dimension(N) - This will contain the permutation used to place deflated - values of D at the end of the array. On output IDXP(2:K) - points to the nondeflated D-values and IDXP(K+1:N) - points to the deflated singular values. - - IDX (workspace) INTEGER array, dimension(N) - This will contain the permutation used to sort the contents of - D into ascending order. - - IDXC (output) INTEGER array, dimension(N) - This will contain the permutation used to arrange the columns - of the deflated U matrix into three groups: the first group - contains non-zero entries only at and above NL, the second - contains non-zero entries only below NL+2, and the third is - dense. - - COLTYP (workspace/output) INTEGER array, dimension(N) - As workspace, this will contain a label which will indicate - which of the following types a column in the U2 matrix or a - row in the VT2 matrix is: - 1 : non-zero in the upper half only - 2 : non-zero in the lower half only - 3 : dense - 4 : deflated - - On exit, it is an array of dimension 4, with COLTYP(I) being - the dimension of the I-th type columns. - - IDXQ (input) INTEGER array, dimension(N) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that entries in - the first hlaf of this permutation must first be moved one - position backward; and entries in the second half - must first have NL+1 added to their values. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --dsigma; - u2_dim1 = *ldu2; - u2_offset = 1 + u2_dim1; - u2 -= u2_offset; - vt2_dim1 = *ldvt2; - vt2_offset = 1 + vt2_dim1; - vt2 -= vt2_offset; - --idxp; - --idx; - --idxc; - --idxq; - --coltyp; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre != 1 && *sqre != 0) { - *info = -3; - } - - n = *nl + *nr + 1; - m = n + *sqre; - - if (*ldu < n) { - *info = -10; - } else if (*ldvt < m) { - *info = -12; - } else if (*ldu2 < n) { - *info = -15; - } else if (*ldvt2 < m) { - *info = -17; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD2", &i__1); - return 0; - } - - nlp1 = *nl + 1; - nlp2 = *nl + 2; - -/* - Generate the first part of the vector Z; and move the singular - values in the first part of D one position backward. -*/ - - z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; - z__[1] = z1; - for (i__ = *nl; i__ >= 1; --i__) { - z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; - d__[i__ + 1] = d__[i__]; - idxq[i__ + 1] = idxq[i__] + 1; -/* L10: */ - } - -/* Generate the second part of the vector Z. */ - - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; -/* L20: */ - } - -/* Initialize some reference arrays. */ - - i__1 = nlp1; - for (i__ = 2; i__ <= i__1; ++i__) { - coltyp[i__] = 1; -/* L30: */ - } - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - coltyp[i__] = 2; -/* L40: */ - } - -/* Sort the singular values into increasing order */ - - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - idxq[i__] += nlp1; -/* L50: */ - } - -/* - DSIGMA, IDXC, IDXC, and the first column of U2 - are used as storage space. -*/ - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dsigma[i__] = d__[idxq[i__]]; - u2[i__ + u2_dim1] = z__[idxq[i__]]; - idxc[i__] = coltyp[idxq[i__]]; -/* L60: */ - } - - slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - idxi = idx[i__] + 1; - d__[i__] = dsigma[idxi]; - z__[i__] = u2[idxi + u2_dim1]; - coltyp[i__] = idxc[idxi]; -/* L70: */ - } - -/* Calculate the allowable deflation tolerance */ - - eps = slamch_("Epsilon"); -/* Computing MAX */ - r__1 = dabs(*alpha), r__2 = dabs(*beta); - tol = dmax(r__1,r__2); -/* Computing MAX */ - r__2 = (r__1 = d__[n], dabs(r__1)); - tol = eps * 8.f * dmax(r__2,tol); - -/* - There are 2 kinds of deflation -- first a value in the z-vector - is small, second two (or more) singular values are very close - together (their difference is small). - - If the value in the z-vector is small, we simply permute the - array so that the corresponding singular value is moved to the - end. - - If two values in the D-vector are close, we perform a two-sided - rotation designed to make one of the corresponding z-vector - entries zero, and then permute the array so that the deflated - singular value is moved to the end. - - If there are multiple singular values then the problem deflates. - Here the number of equal singular values are found. As each equal - singular value is found, an elementary reflector is computed to - rotate the corresponding singular subspace so that the - corresponding components of Z are zero in this new basis. -*/ - - *k = 1; - k2 = n + 1; - i__1 = n; - for (j = 2; j <= i__1; ++j) { - if ((r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - coltyp[j] = 4; - if (j == n) { - goto L120; - } - } else { - jprev = j; - goto L90; - } -/* L80: */ - } -L90: - j = jprev; -L100: - ++j; - if (j > n) { - goto L110; - } - if ((r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - coltyp[j] = 4; - } else { - -/* Check if singular values are close enough to allow deflation. */ - - if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { - -/* Deflation is possible. */ - - s = z__[jprev]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = slapy2_(&c__, &s); - c__ /= tau; - s = -s / tau; - z__[j] = tau; - z__[jprev] = 0.f; - -/* - Apply back the Givens rotation to the left and right - singular vector matrices. -*/ - - idxjp = idxq[idx[jprev] + 1]; - idxj = idxq[idx[j] + 1]; - if (idxjp <= nlp1) { - --idxjp; - } - if (idxj <= nlp1) { - --idxj; - } - srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & - c__1, &c__, &s); - srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & - c__, &s); - if (coltyp[j] != coltyp[jprev]) { - coltyp[j] = 3; - } - coltyp[jprev] = 4; - --k2; - idxp[k2] = jprev; - jprev = j; - } else { - ++(*k); - u2[*k + u2_dim1] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - jprev = j; - } - } - goto L100; -L110: - -/* Record the last singular value. */ - - ++(*k); - u2[*k + u2_dim1] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - -L120: - -/* - Count up the total number of the various types of columns, then - form a permutation which positions the four column types into - four groups of uniform structure (although one or more of these - groups may be empty). -*/ - - for (j = 1; j <= 4; ++j) { - ctot[j - 1] = 0; -/* L130: */ - } - i__1 = n; - for (j = 2; j <= i__1; ++j) { - ct = coltyp[j]; - ++ctot[ct - 1]; -/* L140: */ - } - -/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ - - psm[0] = 2; - psm[1] = ctot[0] + 2; - psm[2] = psm[1] + ctot[1]; - psm[3] = psm[2] + ctot[2]; - -/* - Fill out the IDXC array so that the permutation which it induces - will place all type-1 columns first, all type-2 columns next, - then all type-3's, and finally all type-4's, starting from the - second column. This applies similarly to the rows of VT. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - ct = coltyp[jp]; - idxc[psm[ct - 1]] = j; - ++psm[ct - 1]; -/* L150: */ - } - -/* - Sort the singular values and corresponding singular vectors into - DSIGMA, U2, and VT2 respectively. The singular values/vectors - which were not deflated go into the first K slots of DSIGMA, U2, - and VT2 respectively, while those which were deflated go into the - last N - K slots, except that the first column/row will be treated - separately. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - dsigma[j] = d__[jp]; - idxj = idxq[idx[idxp[idxc[j]]] + 1]; - if (idxj <= nlp1) { - --idxj; - } - scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); - scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); -/* L160: */ - } - -/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ - - dsigma[1] = 0.f; - hlftol = tol / 2.f; - if (dabs(dsigma[2]) <= hlftol) { - dsigma[2] = hlftol; - } - if (m > n) { - z__[1] = slapy2_(&z1, &z__[m]); - if (z__[1] <= tol) { - c__ = 1.f; - s = 0.f; - z__[1] = tol; - } else { - c__ = z1 / z__[1]; - s = z__[m] / z__[1]; - } - } else { - if (dabs(z1) <= tol) { - z__[1] = tol; - } else { - z__[1] = z1; - } - } - -/* Move the rest of the updating row to Z. */ - - i__1 = *k - 1; - scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); - -/* - Determine the first column of U2, the first row of VT2 and the - last row of VT. -*/ - - slaset_("A", &n, &c__1, &c_b1101, &c_b1101, &u2[u2_offset], ldu2); - u2[nlp1 + u2_dim1] = 1.f; - if (m > n) { - i__1 = nlp1; - for (i__ = 1; i__ <= i__1; ++i__) { - vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; - vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; -/* L170: */ - } - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; - vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; -/* L180: */ - } - } else { - scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); - } - if (m > n) { - scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); - } - -/* - The deflated singular values and their corresponding vectors go - into the back of D, U, and V respectively. -*/ - - if (n > *k) { - i__1 = n - *k; - scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = n - *k; - slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) - * u_dim1 + 1], ldu); - i__1 = n - *k; - slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + - vt_dim1], ldvt); - } - -/* Copy CTOT into COLTYP for referencing in SLASD3. */ - - for (j = 1; j <= 4; ++j) { - coltyp[j] = ctot[j - 1]; -/* L190: */ - } - - return 0; - -/* End of SLASD2 */ - -} /* slasd2_ */ - -/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer - *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * - ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, - integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * - info) -{ - /* System generated locals */ - integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, - vt_offset, vt2_dim1, vt2_offset, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static integer i__, j, m, n, jc; - static real rho; - static integer nlp1, nlp2, nrp1; - static real temp; - extern doublereal snrm2_(integer *, real *, integer *); - static integer ctemp; - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer ktemp; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, - real *, real *, real *, real *, integer *), xerbla_(char *, - integer *), slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, - real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASD3 finds all the square roots of the roots of the secular - equation, as defined by the values in D and Z. It makes the - appropriate calls to SLASD4 and then updates the singular - vectors by matrix multiplication. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - SLASD3 is called from SLASD1. - - Arguments - ========= - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (input) INTEGER - The size of the secular equation, 1 =< K = < N. - - D (output) REAL array, dimension(K) - On exit the square roots of the roots of the secular equation, - in ascending order. - - Q (workspace) REAL array, - dimension at least (LDQ,K). - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= K. - - DSIGMA (input) REAL array, dimension(K) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - U (input) REAL array, dimension (LDU, N) - The last N - K columns of this matrix contain the deflated - left singular vectors. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= N. - - U2 (input) REAL array, dimension (LDU2, N) - The first K columns of this matrix contain the non-deflated - left singular vectors for the split problem. - - LDU2 (input) INTEGER - The leading dimension of the array U2. LDU2 >= N. - - VT (input) REAL array, dimension (LDVT, M) - The last M - K columns of VT' contain the deflated - right singular vectors. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= N. - - VT2 (input) REAL array, dimension (LDVT2, N) - The first K columns of VT2' contain the non-deflated - right singular vectors for the split problem. - - LDVT2 (input) INTEGER - The leading dimension of the array VT2. LDVT2 >= N. - - IDXC (input) INTEGER array, dimension ( N ) - The permutation used to arrange the columns of U (and rows of - VT) into three groups: the first group contains non-zero - entries only at and above (or before) NL +1; the second - contains non-zero entries only at and below (or after) NL+2; - and the third is dense. The first column of U and the row of - VT are treated separately, however. - - The rows of the singular vectors found by SLASD4 - must be likewise permuted before the matrix multiplies can - take place. - - CTOT (input) INTEGER array, dimension ( 4 ) - A count of the total number of the various types of columns - in U (or rows in VT), as described in IDXC. The fourth column - type is any column which has been deflated. - - Z (input) REAL array, dimension (K) - The first K elements of this array contain the components - of the deflation-adjusted updating row vector. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --dsigma; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - u2_dim1 = *ldu2; - u2_offset = 1 + u2_dim1; - u2 -= u2_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - vt2_dim1 = *ldvt2; - vt2_offset = 1 + vt2_dim1; - vt2 -= vt2_offset; - --idxc; - --ctot; - --z__; - - /* Function Body */ - *info = 0; - - if (*nl < 1) { - *info = -1; - } else if (*nr < 1) { - *info = -2; - } else if (*sqre != 1 && *sqre != 0) { - *info = -3; - } - - n = *nl + *nr + 1; - m = n + *sqre; - nlp1 = *nl + 1; - nlp2 = *nl + 2; - - if (*k < 1 || *k > n) { - *info = -4; - } else if (*ldq < *k) { - *info = -7; - } else if (*ldu < n) { - *info = -10; - } else if (*ldu2 < n) { - *info = -12; - } else if (*ldvt < m) { - *info = -14; - } else if (*ldvt2 < m) { - *info = -16; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD3", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 1) { - d__[1] = dabs(z__[1]); - scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); - if (z__[1] > 0.f) { - scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); - } else { - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - u[i__ + u_dim1] = -u2[i__ + u2_dim1]; -/* L10: */ - } - } - return 0; - } - -/* - Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), - which on any of these machines zeros out the bottommost - bit of DSIGMA(I) if it is 1; this makes the subsequent - subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DSIGMA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DSIGMA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; -/* L20: */ - } - -/* Keep a copy of Z. */ - - scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); - -/* Normalize Z. */ - - rho = snrm2_(k, &z__[1], &c__1); - slascl_("G", &c__0, &c__0, &rho, &c_b871, k, &c__1, &z__[1], k, info); - rho *= rho; - -/* Find the new singular values. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], - &vt[j * vt_dim1 + 1], info); - -/* If the zero finder fails, the computation is terminated. */ - - if (*info != 0) { - return 0; - } -/* L30: */ - } - -/* Compute updated Z. */ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ - i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); -/* L40: */ - } - i__2 = *k - 1; - for (j = i__; j <= i__2; ++j) { - z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ - i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); -/* L50: */ - } - r__2 = sqrt((r__1 = z__[i__], dabs(r__1))); - z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]); -/* L60: */ - } - -/* - Compute left singular vectors of the modified diagonal matrix, - and store related information for the right singular vectors. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * - vt_dim1 + 1]; - u[i__ * u_dim1 + 1] = -1.f; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ - * vt_dim1]; - u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; -/* L70: */ - } - temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1); - q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - jc = idxc[j]; - q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; -/* L80: */ - } -/* L90: */ - } - -/* Update the left singular vector matrix. */ - - if (*k == 2) { - sgemm_("N", "N", &n, k, k, &c_b871, &u2[u2_offset], ldu2, &q[q_offset] - , ldq, &c_b1101, &u[u_offset], ldu); - goto L100; - } - if (ctot[1] > 0) { - sgemm_("N", "N", nl, k, &ctot[1], &c_b871, &u2[(u2_dim1 << 1) + 1], - ldu2, &q[q_dim1 + 2], ldq, &c_b1101, &u[u_dim1 + 1], ldu); - if (ctot[3] > 0) { - ktemp = ctot[1] + 2 + ctot[2]; - sgemm_("N", "N", nl, k, &ctot[3], &c_b871, &u2[ktemp * u2_dim1 + - 1], ldu2, &q[ktemp + q_dim1], ldq, &c_b871, &u[u_dim1 + 1] - , ldu); - } - } else if (ctot[3] > 0) { - ktemp = ctot[1] + 2 + ctot[2]; - sgemm_("N", "N", nl, k, &ctot[3], &c_b871, &u2[ktemp * u2_dim1 + 1], - ldu2, &q[ktemp + q_dim1], ldq, &c_b1101, &u[u_dim1 + 1], ldu); - } else { - slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); - } - scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); - ktemp = ctot[1] + 2; - ctemp = ctot[2] + ctot[3]; - sgemm_("N", "N", nr, k, &ctemp, &c_b871, &u2[nlp2 + ktemp * u2_dim1], - ldu2, &q[ktemp + q_dim1], ldq, &c_b1101, &u[nlp2 + u_dim1], ldu); - -/* Generate the right singular vectors. */ - -L100: - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); - q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; - i__2 = *k; - for (j = 2; j <= i__2; ++j) { - jc = idxc[j]; - q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; -/* L110: */ - } -/* L120: */ - } - -/* Update the right singular vector matrix. */ - - if (*k == 2) { - sgemm_("N", "N", k, &m, k, &c_b871, &q[q_offset], ldq, &vt2[ - vt2_offset], ldvt2, &c_b1101, &vt[vt_offset], ldvt); - return 0; - } - ktemp = ctot[1] + 1; - sgemm_("N", "N", k, &nlp1, &ktemp, &c_b871, &q[q_dim1 + 1], ldq, &vt2[ - vt2_dim1 + 1], ldvt2, &c_b1101, &vt[vt_dim1 + 1], ldvt); - ktemp = ctot[1] + 2 + ctot[2]; - if (ktemp <= *ldvt2) { - sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b871, &q[ktemp * q_dim1 + 1], - ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b871, &vt[vt_dim1 + 1], - ldvt); - } - - ktemp = ctot[1] + 1; - nrp1 = *nr + *sqre; - if (ktemp > 1) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; -/* L130: */ - } - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; -/* L140: */ - } - } - ctemp = ctot[2] + 1 + ctot[3]; - sgemm_("N", "N", k, &nrp1, &ctemp, &c_b871, &q[ktemp * q_dim1 + 1], ldq, & - vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b1101, &vt[nlp2 * vt_dim1 - + 1], ldvt); - - return 0; - -/* End of SLASD3 */ - -} /* slasd3_ */ - -/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__, - real *delta, real *rho, real *sigma, real *work, integer *info) -{ - /* System generated locals */ - integer i__1; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real a, b, c__; - static integer j; - static real w, dd[3]; - static integer ii; - static real dw, zz[3]; - static integer ip1; - static real eta, phi, eps, tau, psi; - static integer iim1, iip1; - static real dphi, dpsi; - static integer iter; - static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip; - static integer niter; - static real dtisq; - static logical swtch; - static real dtnsq; - extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *, - real *, real *, real *, integer *); - static real delsq2; - extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *, - real *, real *, real *); - static real dtnsq1; - static logical swtch3; - extern doublereal slamch_(char *); - static logical orgati; - static real erretm, dtipsq, rhoinv; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - October 31, 1999 - - - Purpose - ======= - - This subroutine computes the square root of the I-th updated - eigenvalue of a positive symmetric rank-one modification to - a positive diagonal matrix whose entries are given as the squares - of the corresponding entries in the array d, and that - - 0 <= D(i) < D(j) for i < j - - and that RHO > 0. This is arranged by the calling routine, and is - no loss in generality. The rank-one modified system is thus - - diag( D ) * diag( D ) + RHO * Z * Z_transpose. - - where we assume the Euclidean norm of Z is 1. - - The method consists of approximating the rational functions in the - secular equation by simpler interpolating rational functions. - - Arguments - ========= - - N (input) INTEGER - The length of all arrays. - - I (input) INTEGER - The index of the eigenvalue to be computed. 1 <= I <= N. - - D (input) REAL array, dimension ( N ) - The original eigenvalues. It is assumed that they are in - order, 0 <= D(I) < D(J) for I < J. - - Z (input) REAL array, dimension ( N ) - The components of the updating vector. - - DELTA (output) REAL array, dimension ( N ) - If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th - component. If N = 1, then DELTA(1) = 1. The vector DELTA - contains the information necessary to construct the - (singular) eigenvectors. - - RHO (input) REAL - The scalar in the symmetric updating formula. - - SIGMA (output) REAL - The computed lambda_I, the I-th updated eigenvalue. - - WORK (workspace) REAL array, dimension ( N ) - If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th - component. If N = 1, then WORK( 1 ) = 1. - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = 1, the updating process failed. - - Internal Parameters - =================== - - Logical variable ORGATI (origin-at-i?) is used for distinguishing - whether D(i) or D(i+1) is treated as the origin. - - ORGATI = .true. origin at i - ORGATI = .false. origin at i+1 - - Logical variable SWTCH3 (switch-for-3-poles?) is for noting - if we are working with THREE poles! - - MAXIT is the maximum number of iterations allowed for each - eigenvalue. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Since this routine is called in an inner loop, we do no argument - checking. - - Quick return for N=1 and 2. -*/ - - /* Parameter adjustments */ - --work; - --delta; - --z__; - --d__; - - /* Function Body */ - *info = 0; - if (*n == 1) { - -/* Presumably, I=1 upon entry */ - - *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); - delta[1] = 1.f; - work[1] = 1.f; - return 0; - } - if (*n == 2) { - slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); - return 0; - } - -/* Compute machine epsilon */ - - eps = slamch_("Epsilon"); - rhoinv = 1.f / *rho; - -/* The case I = N */ - - if (*i__ == *n) { - -/* Initialize some basic variables */ - - ii = *n - 1; - niter = 1; - -/* Calculate initial guess */ - - temp = *rho / 2.f; - -/* - If ||Z||_2 is not one, then TEMP should be set to - RHO * ||Z||_2^2 / TWO -*/ - - temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*n] + temp1; - delta[j] = d__[j] - d__[*n] - temp1; -/* L10: */ - } - - psi = 0.f; - i__1 = *n - 2; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / (delta[j] * work[j]); -/* L20: */ - } - - c__ = rhoinv + psi; - w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* - n] / (delta[*n] * work[*n]); - - if (w <= 0.f) { - temp1 = sqrt(d__[*n] * d__[*n] + *rho); - temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* - n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * - z__[*n] / *rho; - -/* - The following TAU is to approximate - SIGMA_n^2 - D( N )*D( N ) -*/ - - if (c__ <= temp) { - tau = *rho; - } else { - delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); - a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* - n]; - b = z__[*n] * z__[*n] * delsq; - if (a < 0.f) { - tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); - } - } - -/* - It can be proved that - D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO -*/ - - } else { - delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); - a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; - b = z__[*n] * z__[*n] * delsq; - -/* - The following TAU is to approximate - SIGMA_n^2 - D( N )*D( N ) -*/ - - if (a < 0.f) { - tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); - } else { - tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); - } - -/* - It can be proved that - D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 -*/ - - } - -/* The following ETA is to approximate SIGMA_n - D( N ) */ - - eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); - - *sigma = d__[*n] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] = d__[j] - d__[*i__] - eta; - work[j] = d__[j] + d__[*i__] + eta; -/* L30: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (delta[j] * work[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L40: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (delta[*n] * work[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - ++niter; - dtnsq1 = work[*n - 1] * delta[*n - 1]; - dtnsq = work[*n] * delta[*n]; - c__ = w - dtnsq1 * dpsi - dtnsq * dphi; - a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); - b = dtnsq * dtnsq1 * w; - if (c__ < 0.f) { - c__ = dabs(c__); - } - if (c__ == 0.f) { - eta = *rho - *sigma * *sigma; - } else if (a >= 0.f) { - eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( - c__ * 2.f); - } else { - eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.f) { - eta = -w / (dpsi + dphi); - } - temp = eta - dtnsq; - if (temp > *rho) { - eta = *rho + dtnsq; - } - - tau += eta; - eta /= *sigma + sqrt(eta + *sigma * *sigma); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; - work[j] += eta; -/* L50: */ - } - - *sigma += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L60: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (work[*n] * delta[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( - dpsi + dphi); - - w = rhoinv + phi + psi; - -/* Main loop to update the values of the array DELTA */ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - dtnsq1 = work[*n - 1] * delta[*n - 1]; - dtnsq = work[*n] * delta[*n]; - c__ = w - dtnsq1 * dpsi - dtnsq * dphi; - a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); - b = dtnsq1 * dtnsq * w; - if (a >= 0.f) { - eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } else { - eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta > 0.f) { - eta = -w / (dpsi + dphi); - } - temp = eta - dtnsq; - if (temp <= 0.f) { - eta /= 2.f; - } - - tau += eta; - eta /= *sigma + sqrt(eta + *sigma * *sigma); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - delta[j] -= eta; - work[j] += eta; -/* L70: */ - } - - *sigma += eta; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = ii; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L80: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - temp = z__[*n] / (work[*n] * delta[*n]); - phi = z__[*n] * temp; - dphi = temp * temp; - erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * - (dpsi + dphi); - - w = rhoinv + phi + psi; -/* L90: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - goto L240; - -/* End for the case I = N */ - - } else { - -/* The case for I < N */ - - niter = 1; - ip1 = *i__ + 1; - -/* Calculate initial guess */ - - delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); - delsq2 = delsq / 2.f; - temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*i__] + temp; - delta[j] = d__[j] - d__[*i__] - temp; -/* L100: */ - } - - psi = 0.f; - i__1 = *i__ - 1; - for (j = 1; j <= i__1; ++j) { - psi += z__[j] * z__[j] / (work[j] * delta[j]); -/* L110: */ - } - - phi = 0.f; - i__1 = *i__ + 2; - for (j = *n; j >= i__1; --j) { - phi += z__[j] * z__[j] / (work[j] * delta[j]); -/* L120: */ - } - c__ = rhoinv + psi + phi; - w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ - ip1] * z__[ip1] / (work[ip1] * delta[ip1]); - - if (w > 0.f) { - -/* - d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 - - We choose d(i) as origin. -*/ - - orgati = TRUE_; - sg2lb = 0.f; - sg2ub = delsq2; - a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; - b = z__[*i__] * z__[*i__] * delsq; - if (a > 0.f) { - tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } else { - tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } - -/* - TAU now is an estimation of SIGMA^2 - D( I )^2. The - following, however, is the corresponding estimation of - SIGMA - D( I ). -*/ - - eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); - } else { - -/* - (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 - - We choose d(i+1) as origin. -*/ - - orgati = FALSE_; - sg2lb = -delsq2; - sg2ub = 0.f; - a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; - b = z__[ip1] * z__[ip1] * delsq; - if (a < 0.f) { - tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( - r__1)))); - } else { - tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) - / (c__ * 2.f); - } - -/* - TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The - following, however, is the corresponding estimation of - SIGMA - D( IP1 ). -*/ - - eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau, - dabs(r__1)))); - } - - if (orgati) { - ii = *i__; - *sigma = d__[*i__] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[*i__] + eta; - delta[j] = d__[j] - d__[*i__] - eta; -/* L130: */ - } - } else { - ii = *i__ + 1; - *sigma = d__[ip1] + eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] = d__[j] + d__[ip1] + eta; - delta[j] = d__[j] - d__[ip1] - eta; -/* L140: */ - } - } - iim1 = ii - 1; - iip1 = ii + 1; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L150: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L160: */ - } - - w = rhoinv + phi + psi; - -/* - W is the value of the secular function with - its ii-th element removed. -*/ - - swtch3 = FALSE_; - if (orgati) { - if (w < 0.f) { - swtch3 = TRUE_; - } - } else { - if (w > 0.f) { - swtch3 = TRUE_; - } - } - if (ii == 1 || ii == *n) { - swtch3 = FALSE_; - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w += temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f - + dabs(tau) * dw; - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - goto L240; - } - - if (w <= 0.f) { - sg2lb = dmax(sg2lb,tau); - } else { - sg2ub = dmin(sg2ub,tau); - } - -/* Calculate the new step */ - - ++niter; - if (! swtch3) { - dtipsq = work[ip1] * delta[ip1]; - dtisq = work[*i__] * delta[*i__]; - if (orgati) { -/* Computing 2nd power */ - r__1 = z__[*i__] / dtisq; - c__ = w - dtipsq * dw + delsq * (r__1 * r__1); - } else { -/* Computing 2nd power */ - r__1 = z__[ip1] / dtipsq; - c__ = w - dtisq * dw - delsq * (r__1 * r__1); - } - a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; - b = dtipsq * dtisq * w; - if (c__ == 0.f) { - if (a == 0.f) { - if (orgati) { - a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + - dphi); - } else { - a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + - dphi); - } - } - eta = b / a; - } else if (a <= 0.f) { - eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / - (c__ * 2.f); - } else { - eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( - r__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - dtiim = work[iim1] * delta[iim1]; - dtiip = work[iip1] * delta[iip1]; - temp = rhoinv + psi + phi; - if (orgati) { - temp1 = z__[iim1] / dtiim; - temp1 *= temp1; - c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * - (d__[iim1] + d__[iip1]) * temp1; - zz[0] = z__[iim1] * z__[iim1]; - if (dpsi < temp1) { - zz[2] = dtiip * dtiip * dphi; - } else { - zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); - } - } else { - temp1 = z__[iip1] / dtiip; - temp1 *= temp1; - c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * - (d__[iim1] + d__[iip1]) * temp1; - if (dphi < temp1) { - zz[0] = dtiim * dtiim * dpsi; - } else { - zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); - } - zz[2] = z__[iip1] * z__[iip1]; - } - zz[1] = z__[ii] * z__[ii]; - dd[0] = dtiim; - dd[1] = delta[ii] * work[ii]; - dd[2] = dtiip; - slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); - if (*info != 0) { - goto L240; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.f) { - eta = -w / dw; - } - if (orgati) { - temp1 = work[*i__] * delta[*i__]; - temp = eta - temp1; - } else { - temp1 = work[ip1] * delta[ip1]; - temp = eta - temp1; - } - if (temp > sg2ub || temp < sg2lb) { - if (w < 0.f) { - eta = (sg2ub - tau) / 2.f; - } else { - eta = (sg2lb - tau) / 2.f; - } - } - - tau += eta; - eta /= *sigma + sqrt(*sigma * *sigma + eta); - - prew = w; - - *sigma += eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] += eta; - delta[j] -= eta; -/* L170: */ - } - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L180: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L190: */ - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f - + dabs(tau) * dw; - - if (w <= 0.f) { - sg2lb = dmax(sg2lb,tau); - } else { - sg2ub = dmin(sg2ub,tau); - } - - swtch = FALSE_; - if (orgati) { - if (-w > dabs(prew) / 10.f) { - swtch = TRUE_; - } - } else { - if (w > dabs(prew) / 10.f) { - swtch = TRUE_; - } - } - -/* Main loop to update the values of the array DELTA and WORK */ - - iter = niter + 1; - - for (niter = iter; niter <= 20; ++niter) { - -/* Test for convergence */ - - if (dabs(w) <= eps * erretm) { - goto L240; - } - -/* Calculate the new step */ - - if (! swtch3) { - dtipsq = work[ip1] * delta[ip1]; - dtisq = work[*i__] * delta[*i__]; - if (! swtch) { - if (orgati) { -/* Computing 2nd power */ - r__1 = z__[*i__] / dtisq; - c__ = w - dtipsq * dw + delsq * (r__1 * r__1); - } else { -/* Computing 2nd power */ - r__1 = z__[ip1] / dtipsq; - c__ = w - dtisq * dw - delsq * (r__1 * r__1); - } - } else { - temp = z__[ii] / (work[ii] * delta[ii]); - if (orgati) { - dpsi += temp * temp; - } else { - dphi += temp * temp; - } - c__ = w - dtisq * dpsi - dtipsq * dphi; - } - a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; - b = dtipsq * dtisq * w; - if (c__ == 0.f) { - if (a == 0.f) { - if (! swtch) { - if (orgati) { - a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * - (dpsi + dphi); - } else { - a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( - dpsi + dphi); - } - } else { - a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; - } - } - eta = b / a; - } else if (a <= 0.f) { - eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) - )) / (c__ * 2.f); - } else { - eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, - dabs(r__1)))); - } - } else { - -/* Interpolation using THREE most relevant poles */ - - dtiim = work[iim1] * delta[iim1]; - dtiip = work[iip1] * delta[iip1]; - temp = rhoinv + psi + phi; - if (swtch) { - c__ = temp - dtiim * dpsi - dtiip * dphi; - zz[0] = dtiim * dtiim * dpsi; - zz[2] = dtiip * dtiip * dphi; - } else { - if (orgati) { - temp1 = z__[iim1] / dtiim; - temp1 *= temp1; - temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ - iip1]) * temp1; - c__ = temp - dtiip * (dpsi + dphi) - temp2; - zz[0] = z__[iim1] * z__[iim1]; - if (dpsi < temp1) { - zz[2] = dtiip * dtiip * dphi; - } else { - zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); - } - } else { - temp1 = z__[iip1] / dtiip; - temp1 *= temp1; - temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ - iip1]) * temp1; - c__ = temp - dtiim * (dpsi + dphi) - temp2; - if (dphi < temp1) { - zz[0] = dtiim * dtiim * dpsi; - } else { - zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); - } - zz[2] = z__[iip1] * z__[iip1]; - } - } - dd[0] = dtiim; - dd[1] = delta[ii] * work[ii]; - dd[2] = dtiip; - slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); - if (*info != 0) { - goto L240; - } - } - -/* - Note, eta should be positive if w is negative, and - eta should be negative otherwise. However, - if for some reason caused by roundoff, eta*w > 0, - we simply use one Newton step instead. This way - will guarantee eta*w < 0. -*/ - - if (w * eta >= 0.f) { - eta = -w / dw; - } - if (orgati) { - temp1 = work[*i__] * delta[*i__]; - temp = eta - temp1; - } else { - temp1 = work[ip1] * delta[ip1]; - temp = eta - temp1; - } - if (temp > sg2ub || temp < sg2lb) { - if (w < 0.f) { - eta = (sg2ub - tau) / 2.f; - } else { - eta = (sg2lb - tau) / 2.f; - } - } - - tau += eta; - eta /= *sigma + sqrt(*sigma * *sigma + eta); - - *sigma += eta; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - work[j] += eta; - delta[j] -= eta; -/* L200: */ - } - - prew = w; - -/* Evaluate PSI and the derivative DPSI */ - - dpsi = 0.f; - psi = 0.f; - erretm = 0.f; - i__1 = iim1; - for (j = 1; j <= i__1; ++j) { - temp = z__[j] / (work[j] * delta[j]); - psi += z__[j] * temp; - dpsi += temp * temp; - erretm += psi; -/* L210: */ - } - erretm = dabs(erretm); - -/* Evaluate PHI and the derivative DPHI */ - - dphi = 0.f; - phi = 0.f; - i__1 = iip1; - for (j = *n; j >= i__1; --j) { - temp = z__[j] / (work[j] * delta[j]); - phi += z__[j] * temp; - dphi += temp * temp; - erretm += phi; -/* L220: */ - } - - temp = z__[ii] / (work[ii] * delta[ii]); - dw = dpsi + dphi + temp * temp; - temp = z__[ii] * temp; - w = rhoinv + phi + psi + temp; - erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * - 3.f + dabs(tau) * dw; - if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { - swtch = ! swtch; - } - - if (w <= 0.f) { - sg2lb = dmax(sg2lb,tau); - } else { - sg2ub = dmin(sg2ub,tau); - } - -/* L230: */ - } - -/* Return with INFO = 1, NITER = MAXIT and not converged */ - - *info = 1; - - } - -L240: - return 0; - -/* End of SLASD4 */ - -} /* slasd4_ */ - -/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta, - real *rho, real *dsigma, real *work) -{ - /* System generated locals */ - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real b, c__, w, del, tau, delsq; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - This subroutine computes the square root of the I-th eigenvalue - of a positive symmetric rank-one modification of a 2-by-2 diagonal - matrix - - diag( D ) * diag( D ) + RHO * Z * transpose(Z) . - - The diagonal entries in the array D are assumed to satisfy - - 0 <= D(i) < D(j) for i < j . - - We also assume RHO > 0 and that the Euclidean norm of the vector - Z is one. - - Arguments - ========= - - I (input) INTEGER - The index of the eigenvalue to be computed. I = 1 or I = 2. - - D (input) REAL array, dimension ( 2 ) - The original eigenvalues. We assume 0 <= D(1) < D(2). - - Z (input) REAL array, dimension ( 2 ) - The components of the updating vector. - - DELTA (output) REAL array, dimension ( 2 ) - Contains (D(j) - lambda_I) in its j-th component. - The vector DELTA contains the information necessary - to construct the eigenvectors. - - RHO (input) REAL - The scalar in the symmetric updating formula. - - DSIGMA (output) REAL - The computed lambda_I, the I-th updated eigenvalue. - - WORK (workspace) REAL array, dimension ( 2 ) - WORK contains (D(j) + sigma_I) in its j-th component. - - Further Details - =============== - - Based on contributions by - Ren-Cang Li, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --work; - --delta; - --z__; - --d__; - - /* Function Body */ - del = d__[2] - d__[1]; - delsq = del * (d__[2] + d__[1]); - if (*i__ == 1) { - w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] * - z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f; - if (w > 0.f) { - b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[1] * z__[1] * delsq; - -/* - B > ZERO, always - - The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) -*/ - - tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) - )); - -/* The following TAU is DSIGMA - D( 1 ) */ - - tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); - *dsigma = d__[1] + tau; - delta[1] = -tau; - delta[2] = del - tau; - work[1] = d__[1] * 2.f + tau; - work[2] = d__[1] + tau + d__[2]; -/* - DELTA( 1 ) = -Z( 1 ) / TAU - DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) -*/ - } else { - b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * delsq; - -/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ - - if (b > 0.f) { - tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); - } else { - tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; - } - -/* The following TAU is DSIGMA - D( 2 ) */ - - tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1))); - *dsigma = d__[2] + tau; - delta[1] = -(del + tau); - delta[2] = -tau; - work[1] = d__[1] + tau + d__[2]; - work[2] = d__[2] * 2.f + tau; -/* - DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) - DELTA( 2 ) = -Z( 2 ) / TAU -*/ - } -/* - TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) - DELTA( 1 ) = DELTA( 1 ) / TEMP - DELTA( 2 ) = DELTA( 2 ) / TEMP -*/ - } else { - -/* Now I=2 */ - - b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); - c__ = *rho * z__[2] * z__[2] * delsq; - -/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ - - if (b > 0.f) { - tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; - } else { - tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); - } - -/* The following TAU is DSIGMA - D( 2 ) */ - - tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); - *dsigma = d__[2] + tau; - delta[1] = -(del + tau); - delta[2] = -tau; - work[1] = d__[1] + tau + d__[2]; - work[2] = d__[2] * 2.f + tau; -/* - DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) - DELTA( 2 ) = -Z( 2 ) / TAU - TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) - DELTA( 1 ) = DELTA( 1 ) / TEMP - DELTA( 2 ) = DELTA( 2 ) / TEMP -*/ - } - return 0; - -/* End of SLASD5 */ - -} /* slasd5_ */ - -/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr, - integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, - integer *idxq, integer *perm, integer *givptr, integer *givcol, - integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * - difl, real *difr, real *z__, integer *k, real *c__, real *s, real * - work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, - poles_dim1, poles_offset, i__1; - real r__1, r__2; - - /* Local variables */ - static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), slasd7_(integer *, integer *, integer *, integer *, - integer *, real *, real *, real *, real *, real *, real *, real *, - real *, real *, real *, integer *, integer *, integer *, integer - *, integer *, integer *, integer *, real *, integer *, real *, - real *, integer *), slasd8_(integer *, integer *, real *, real *, - real *, real *, real *, real *, integer *, real *, real *, - integer *); - static integer isigma; - extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( - char *, integer *, integer *, real *, real *, integer *, integer * - , real *, integer *, integer *), slamrg_(integer *, - integer *, real *, integer *, integer *, integer *); - static real orgnrm; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASD6 computes the SVD of an updated upper bidiagonal matrix B - obtained by merging two smaller ones by appending a row. This - routine is used only for the problem which requires all singular - values and optionally singular vector matrices in factored form. - B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. - A related subroutine, SLASD1, handles the case in which all singular - values and singular vectors of the bidiagonal matrix are desired. - - SLASD6 computes the SVD as follows: - - ( D1(in) 0 0 0 ) - B = U(in) * ( Z1' a Z2' b ) * VT(in) - ( 0 0 D2(in) 0 ) - - = U(out) * ( D(out) 0) * VT(out) - - where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M - with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros - elsewhere; and the entry b is empty if SQRE = 0. - - The singular values of B can be computed using D1, D2, the first - components of all the right singular vectors of the lower block, and - the last components of all the right singular vectors of the upper - block. These components are stored and updated in VF and VL, - respectively, in SLASD6. Hence U and VT are not explicitly - referenced. - - The singular values are stored in D. The algorithm consists of two - stages: - - The first stage consists of deflating the size of the problem - when there are multiple singular values or if there is a zero - in the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine SLASD7. - - The second stage consists of calculating the updated - singular values. This is done by finding the roots of the - secular equation via the routine SLASD4 (as called by SLASD8). - This routine also updates VF and VL and computes the distances - between the updated singular values and the old singular - values. - - SLASD6 is called from SLASDA. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form: - = 0: Compute singular values only. - = 1: Compute singular vectors in factored form as well. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - D (input/output) REAL array, dimension ( NL+NR+1 ). - On entry D(1:NL,1:NL) contains the singular values of the - upper block, and D(NL+2:N) contains the singular values - of the lower block. On exit D(1:N) contains the singular - values of the modified matrix. - - VF (input/output) REAL array, dimension ( M ) - On entry, VF(1:NL+1) contains the first components of all - right singular vectors of the upper block; and VF(NL+2:M) - contains the first components of all right singular vectors - of the lower block. On exit, VF contains the first components - of all right singular vectors of the bidiagonal matrix. - - VL (input/output) REAL array, dimension ( M ) - On entry, VL(1:NL+1) contains the last components of all - right singular vectors of the upper block; and VL(NL+2:M) - contains the last components of all right singular vectors of - the lower block. On exit, VL contains the last components of - all right singular vectors of the bidiagonal matrix. - - ALPHA (input) REAL - Contains the diagonal element associated with the added row. - - BETA (input) REAL - Contains the off-diagonal element associated with the added - row. - - IDXQ (output) INTEGER array, dimension ( N ) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, i.e. - D( IDXQ( I = 1, N ) ) will be in ascending order. - - PERM (output) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) to be applied - to each block. Not referenced if ICOMPQ = 0. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. Not referenced if ICOMPQ = 0. - - GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. Not referenced if ICOMPQ = 0. - - LDGCOL (input) INTEGER - leading dimension of GIVCOL, must be at least N. - - GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value to be used in the - corresponding Givens rotation. Not referenced if ICOMPQ = 0. - - LDGNUM (input) INTEGER - The leading dimension of GIVNUM and POLES, must be at least N. - - POLES (output) REAL array, dimension ( LDGNUM, 2 ) - On exit, POLES(1,*) is an array containing the new singular - values obtained from solving the secular equation, and - POLES(2,*) is an array containing the poles in the secular - equation. Not referenced if ICOMPQ = 0. - - DIFL (output) REAL array, dimension ( N ) - On exit, DIFL(I) is the distance between I-th updated - (undeflated) singular value and the I-th (undeflated) old - singular value. - - DIFR (output) REAL array, - dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - On exit, DIFR(I, 1) is the distance between I-th updated - (undeflated) singular value and the I+1-th (undeflated) old - singular value. - - If ICOMPQ = 1, DIFR(1:K,2) is an array containing the - normalizing factors for the right singular vector matrix. - - See SLASD8 for details on DIFL and DIFR. - - Z (output) REAL array, dimension ( M ) - The first elements of this array contain the components - of the deflation-adjusted updating row vector. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - C (output) REAL - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (output) REAL - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - WORK (workspace) REAL array, dimension ( 4 * M ) - - IWORK (workspace) INTEGER array, dimension ( 3 * N ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --vf; - --vl; - --idxq; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - poles_dim1 = *ldgnum; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - --difl; - --difr; - --z__; - --work; - --iwork; - - /* Function Body */ - *info = 0; - n = *nl + *nr + 1; - m = n + *sqre; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldgcol < n) { - *info = -14; - } else if (*ldgnum < n) { - *info = -16; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD6", &i__1); - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in SLASD7 and SLASD8. -*/ - - isigma = 1; - iw = isigma + n; - ivfw = iw + m; - ivlw = ivfw + m; - - idx = 1; - idxc = idx + n; - idxp = idxc + n; - -/* - Scale. - - Computing MAX -*/ - r__1 = dabs(*alpha), r__2 = dabs(*beta); - orgnrm = dmax(r__1,r__2); - d__[*nl + 1] = 0.f; - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { - orgnrm = (r__1 = d__[i__], dabs(r__1)); - } -/* L10: */ - } - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &n, &c__1, &d__[1], &n, info); - *alpha /= orgnrm; - *beta /= orgnrm; - -/* Sort and Deflate singular values. */ - - slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & - work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & - iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ - givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, - info); - -/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ - - slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], - ldgnum, &work[isigma], &work[iw], info); - -/* Save the poles if ICOMPQ = 1. */ - - if (*icompq == 1) { - scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); - scopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); - } - -/* Unscale. */ - - slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &n, &c__1, &d__[1], &n, info); - -/* Prepare the IDXQ sorting permutation. */ - - n1 = *k; - n2 = n - *k; - slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); - - return 0; - -/* End of SLASD6 */ - -} /* slasd6_ */ - -/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr, - integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, - real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, - integer *idx, integer *idxp, integer *idxq, integer *perm, integer * - givptr, integer *givcol, integer *ldgcol, real *givnum, integer * - ldgnum, real *c__, real *s, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; - real r__1, r__2; - - /* Local variables */ - static integer i__, j, m, n, k2; - static real z1; - static integer jp; - static real eps, tau, tol; - static integer nlp1, nlp2, idxi, idxj; - extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, - integer *, real *, real *); - static integer idxjp, jprev; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - extern doublereal slapy2_(real *, real *), slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( - integer *, integer *, real *, integer *, integer *, integer *); - static real hlftol; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASD7 merges the two sets of singular values together into a single - sorted set. Then it tries to deflate the size of the problem. There - are two ways in which deflation can occur: when two or more singular - values are close together or if there is a tiny entry in the Z - vector. For each such occurrence the order of the related - secular equation problem is reduced by one. - - SLASD7 is called from SLASD6. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed - in compact form, as follows: - = 0: Compute singular values only. - = 1: Compute singular vectors of upper - bidiagonal matrix in compact form. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has - N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - K (output) INTEGER - Contains the dimension of the non-deflated matrix, this is - the order of the related secular equation. 1 <= K <=N. - - D (input/output) REAL array, dimension ( N ) - On entry D contains the singular values of the two submatrices - to be combined. On exit D contains the trailing (N-K) updated - singular values (those which were deflated) sorted into - increasing order. - - Z (output) REAL array, dimension ( M ) - On exit Z contains the updating row vector in the secular - equation. - - ZW (workspace) REAL array, dimension ( M ) - Workspace for Z. - - VF (input/output) REAL array, dimension ( M ) - On entry, VF(1:NL+1) contains the first components of all - right singular vectors of the upper block; and VF(NL+2:M) - contains the first components of all right singular vectors - of the lower block. On exit, VF contains the first components - of all right singular vectors of the bidiagonal matrix. - - VFW (workspace) REAL array, dimension ( M ) - Workspace for VF. - - VL (input/output) REAL array, dimension ( M ) - On entry, VL(1:NL+1) contains the last components of all - right singular vectors of the upper block; and VL(NL+2:M) - contains the last components of all right singular vectors - of the lower block. On exit, VL contains the last components - of all right singular vectors of the bidiagonal matrix. - - VLW (workspace) REAL array, dimension ( M ) - Workspace for VL. - - ALPHA (input) REAL - Contains the diagonal element associated with the added row. - - BETA (input) REAL - Contains the off-diagonal element associated with the added - row. - - DSIGMA (output) REAL array, dimension ( N ) - Contains a copy of the diagonal elements (K-1 singular values - and one zero) in the secular equation. - - IDX (workspace) INTEGER array, dimension ( N ) - This will contain the permutation used to sort the contents of - D into ascending order. - - IDXP (workspace) INTEGER array, dimension ( N ) - This will contain the permutation used to place deflated - values of D at the end of the array. On output IDXP(2:K) - points to the nondeflated D-values and IDXP(K+1:N) - points to the deflated singular values. - - IDXQ (input) INTEGER array, dimension ( N ) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that entries in - the first half of this permutation must first be moved one - position backward; and entries in the second half - must first have NL+1 added to their values. - - PERM (output) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) to be applied - to each singular block. Not referenced if ICOMPQ = 0. - - GIVPTR (output) INTEGER - The number of Givens rotations which took place in this - subproblem. Not referenced if ICOMPQ = 0. - - GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. Not referenced if ICOMPQ = 0. - - LDGCOL (input) INTEGER - The leading dimension of GIVCOL, must be at least N. - - GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value to be used in the - corresponding Givens rotation. Not referenced if ICOMPQ = 0. - - LDGNUM (input) INTEGER - The leading dimension of GIVNUM, must be at least N. - - C (output) REAL - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (output) REAL - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - --zw; - --vf; - --vfw; - --vl; - --vlw; - --dsigma; - --idx; - --idxp; - --idxq; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - - /* Function Body */ - *info = 0; - n = *nl + *nr + 1; - m = n + *sqre; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldgcol < n) { - *info = -22; - } else if (*ldgnum < n) { - *info = -24; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD7", &i__1); - return 0; - } - - nlp1 = *nl + 1; - nlp2 = *nl + 2; - if (*icompq == 1) { - *givptr = 0; - } - -/* - Generate the first part of the vector Z and move the singular - values in the first part of D one position backward. -*/ - - z1 = *alpha * vl[nlp1]; - vl[nlp1] = 0.f; - tau = vf[nlp1]; - for (i__ = *nl; i__ >= 1; --i__) { - z__[i__ + 1] = *alpha * vl[i__]; - vl[i__] = 0.f; - vf[i__ + 1] = vf[i__]; - d__[i__ + 1] = d__[i__]; - idxq[i__ + 1] = idxq[i__] + 1; -/* L10: */ - } - vf[1] = tau; - -/* Generate the second part of the vector Z. */ - - i__1 = m; - for (i__ = nlp2; i__ <= i__1; ++i__) { - z__[i__] = *beta * vf[i__]; - vf[i__] = 0.f; -/* L20: */ - } - -/* Sort the singular values into increasing order */ - - i__1 = n; - for (i__ = nlp2; i__ <= i__1; ++i__) { - idxq[i__] += nlp1; -/* L30: */ - } - -/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - dsigma[i__] = d__[idxq[i__]]; - zw[i__] = z__[idxq[i__]]; - vfw[i__] = vf[idxq[i__]]; - vlw[i__] = vl[idxq[i__]]; -/* L40: */ - } - - slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); - - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - idxi = idx[i__] + 1; - d__[i__] = dsigma[idxi]; - z__[i__] = zw[idxi]; - vf[i__] = vfw[idxi]; - vl[i__] = vlw[idxi]; -/* L50: */ - } - -/* Calculate the allowable deflation tolerence */ - - eps = slamch_("Epsilon"); -/* Computing MAX */ - r__1 = dabs(*alpha), r__2 = dabs(*beta); - tol = dmax(r__1,r__2); -/* Computing MAX */ - r__2 = (r__1 = d__[n], dabs(r__1)); - tol = eps * 64.f * dmax(r__2,tol); - -/* - There are 2 kinds of deflation -- first a value in the z-vector - is small, second two (or more) singular values are very close - together (their difference is small). - - If the value in the z-vector is small, we simply permute the - array so that the corresponding singular value is moved to the - end. - - If two values in the D-vector are close, we perform a two-sided - rotation designed to make one of the corresponding z-vector - entries zero, and then permute the array so that the deflated - singular value is moved to the end. - - If there are multiple singular values then the problem deflates. - Here the number of equal singular values are found. As each equal - singular value is found, an elementary reflector is computed to - rotate the corresponding singular subspace so that the - corresponding components of Z are zero in this new basis. -*/ - - *k = 1; - k2 = n + 1; - i__1 = n; - for (j = 2; j <= i__1; ++j) { - if ((r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - if (j == n) { - goto L100; - } - } else { - jprev = j; - goto L70; - } -/* L60: */ - } -L70: - j = jprev; -L80: - ++j; - if (j > n) { - goto L90; - } - if ((r__1 = z__[j], dabs(r__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - idxp[k2] = j; - } else { - -/* Check if singular values are close enough to allow deflation. */ - - if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { - -/* Deflation is possible. */ - - *s = z__[jprev]; - *c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = slapy2_(c__, s); - z__[j] = tau; - z__[jprev] = 0.f; - *c__ /= tau; - *s = -(*s) / tau; - -/* Record the appropriate Givens rotation */ - - if (*icompq == 1) { - ++(*givptr); - idxjp = idxq[idx[jprev] + 1]; - idxj = idxq[idx[j] + 1]; - if (idxjp <= nlp1) { - --idxjp; - } - if (idxj <= nlp1) { - --idxj; - } - givcol[*givptr + (givcol_dim1 << 1)] = idxjp; - givcol[*givptr + givcol_dim1] = idxj; - givnum[*givptr + (givnum_dim1 << 1)] = *c__; - givnum[*givptr + givnum_dim1] = *s; - } - srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); - srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); - --k2; - idxp[k2] = jprev; - jprev = j; - } else { - ++(*k); - zw[*k] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - jprev = j; - } - } - goto L80; -L90: - -/* Record the last singular value. */ - - ++(*k); - zw[*k] = z__[jprev]; - dsigma[*k] = d__[jprev]; - idxp[*k] = jprev; - -L100: - -/* - Sort the singular values into DSIGMA. The singular values which - were not deflated go into the first K slots of DSIGMA, except - that DSIGMA(1) is treated separately. -*/ - - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - dsigma[j] = d__[jp]; - vfw[j] = vf[jp]; - vlw[j] = vl[jp]; -/* L110: */ - } - if (*icompq == 1) { - i__1 = n; - for (j = 2; j <= i__1; ++j) { - jp = idxp[j]; - perm[j] = idxq[idx[jp] + 1]; - if (perm[j] <= nlp1) { - --perm[j]; - } -/* L120: */ - } - } - -/* - The deflated singular values go back into the last N - K slots of - D. -*/ - - i__1 = n - *k; - scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); - -/* - Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and - VL(M). -*/ - - dsigma[1] = 0.f; - hlftol = tol / 2.f; - if (dabs(dsigma[2]) <= hlftol) { - dsigma[2] = hlftol; - } - if (m > n) { - z__[1] = slapy2_(&z1, &z__[m]); - if (z__[1] <= tol) { - *c__ = 1.f; - *s = 0.f; - z__[1] = tol; - } else { - *c__ = z1 / z__[1]; - *s = -z__[m] / z__[1]; - } - srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); - srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); - } else { - if (dabs(z1) <= tol) { - z__[1] = tol; - } else { - z__[1] = z1; - } - } - -/* Restore Z, VF, and VL. */ - - i__1 = *k - 1; - scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); - i__1 = n - 1; - scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); - i__1 = n - 1; - scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); - - return 0; - -/* End of SLASD7 */ - -} /* slasd7_ */ - -/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real * - z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, - real *dsigma, real *work, integer *info) -{ - /* System generated locals */ - integer difr_dim1, difr_offset, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static integer i__, j; - static real dj, rho; - static integer iwk1, iwk2, iwk3; - static real temp; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - static integer iwk2i, iwk3i; - extern doublereal snrm2_(integer *, real *, integer *); - static real diflj, difrj, dsigj; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - extern doublereal slamc3_(real *, real *); - extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, - real *, real *, real *, real *, integer *), xerbla_(char *, - integer *); - static real dsigjp; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, - real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASD8 finds the square roots of the roots of the secular equation, - as defined by the values in DSIGMA and Z. It makes the appropriate - calls to SLASD4, and stores, for each element in D, the distance - to its two nearest poles (elements in DSIGMA). It also updates - the arrays VF and VL, the first and last components of all the - right singular vectors of the original bidiagonal matrix. - - SLASD8 is called from SLASD6. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form in the calling routine: - = 0: Compute singular values only. - = 1: Compute singular vectors in factored form as well. - - K (input) INTEGER - The number of terms in the rational function to be solved - by SLASD4. K >= 1. - - D (output) REAL array, dimension ( K ) - On output, D contains the updated singular values. - - Z (input) REAL array, dimension ( K ) - The first K elements of this array contain the components - of the deflation-adjusted updating row vector. - - VF (input/output) REAL array, dimension ( K ) - On entry, VF contains information passed through DBEDE8. - On exit, VF contains the first K components of the first - components of all right singular vectors of the bidiagonal - matrix. - - VL (input/output) REAL array, dimension ( K ) - On entry, VL contains information passed through DBEDE8. - On exit, VL contains the first K components of the last - components of all right singular vectors of the bidiagonal - matrix. - - DIFL (output) REAL array, dimension ( K ) - On exit, DIFL(I) = D(I) - DSIGMA(I). - - DIFR (output) REAL array, - dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and - dimension ( K ) if ICOMPQ = 0. - On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not - defined and will not be referenced. - - If ICOMPQ = 1, DIFR(1:K,2) is an array containing the - normalizing factors for the right singular vector matrix. - - LDDIFR (input) INTEGER - The leading dimension of DIFR, must be at least K. - - DSIGMA (input) REAL array, dimension ( K ) - The first K elements of this array contain the old roots - of the deflated updating problem. These are the poles - of the secular equation. - - WORK (workspace) REAL array, dimension at least 3 * K - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --z__; - --vf; - --vl; - --difl; - difr_dim1 = *lddifr; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - --dsigma; - --work; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*k < 1) { - *info = -2; - } else if (*lddifr < *k) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASD8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*k == 1) { - d__[1] = dabs(z__[1]); - difl[1] = d__[1]; - if (*icompq == 1) { - difl[2] = 1.f; - difr[(difr_dim1 << 1) + 1] = 1.f; - } - return 0; - } - -/* - Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can - be computed with high relative accuracy (barring over/underflow). - This is a problem on machines without a guard digit in - add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). - The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), - which on any of these machines zeros out the bottommost - bit of DSIGMA(I) if it is 1; this makes the subsequent - subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation - occurs. On binary machines with a guard digit (almost all - machines) it does not change DSIGMA(I) at all. On hexadecimal - and decimal machines with a guard digit, it slightly - changes the bottommost bits of DSIGMA(I). It does not account - for hexadecimal or decimal machines without guard digits - (we know of none). We use a subroutine call to compute - 2*DLAMBDA(I) to prevent optimizing compilers from eliminating - this code. -*/ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; -/* L10: */ - } - -/* Book keeping. */ - - iwk1 = 1; - iwk2 = iwk1 + *k; - iwk3 = iwk2 + *k; - iwk2i = iwk2 - 1; - iwk3i = iwk3 - 1; - -/* Normalize Z. */ - - rho = snrm2_(k, &z__[1], &c__1); - slascl_("G", &c__0, &c__0, &rho, &c_b871, k, &c__1, &z__[1], k, info); - rho *= rho; - -/* Initialize WORK(IWK3). */ - - slaset_("A", k, &c__1, &c_b871, &c_b871, &work[iwk3], k); - -/* - Compute the updated singular values, the arrays DIFL, DIFR, - and the updated Z. -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ - iwk2], info); - -/* If the root finder fails, the computation is terminated. */ - - if (*info != 0) { - return 0; - } - work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; - difl[j] = -work[j]; - difr[j + difr_dim1] = -work[j + 1]; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + - i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ - j]); -/* L20: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + - i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ - j]); -/* L30: */ - } -/* L40: */ - } - -/* Compute updated Z. */ - - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1))); - z__[i__] = r_sign(&r__2, &z__[i__]); -/* L50: */ - } - -/* Update VF and VL. */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - diflj = difl[j]; - dj = d__[j]; - dsigj = -dsigma[j]; - if (j < *k) { - difrj = -difr[j + difr_dim1]; - dsigjp = -dsigma[j + 1]; - } - work[j] = -z__[j] / diflj / (dsigma[j] + dj); - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / ( - dsigma[i__] + dj); -/* L60: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / - (dsigma[i__] + dj); -/* L70: */ - } - temp = snrm2_(k, &work[1], &c__1); - work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; - work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; - if (*icompq == 1) { - difr[j + (difr_dim1 << 1)] = temp; - } -/* L80: */ - } - - scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); - scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); - - return 0; - -/* End of SLASD8 */ - -} /* slasd8_ */ - -/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n, - integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, - integer *k, real *difl, real *difr, real *z__, real *poles, integer * - givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, - real *c__, real *s, real *work, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, - difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, - poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, - z_dim1, z_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, - nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; - static real beta; - static integer idxq, nlvl; - static real alpha; - static integer inode, ndiml, ndimr, idxqi, itemp, sqrei; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), slasd6_(integer *, integer *, integer *, integer *, - real *, real *, real *, real *, real *, integer *, integer *, - integer *, integer *, integer *, real *, integer *, real *, real * - , real *, real *, integer *, real *, real *, real *, integer *, - integer *); - static integer nwork1, nwork2; - extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_( - char *, integer *, integer *, integer *, integer *, integer *, - real *, real *, real *, integer *, real *, integer *, real *, - integer *, real *, integer *), slasdt_(integer *, integer - *, integer *, integer *, integer *, integer *, integer *), - slaset_(char *, integer *, integer *, real *, real *, real *, - integer *); - static integer smlszp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - Using a divide and conquer approach, SLASDA computes the singular - value decomposition (SVD) of a real upper bidiagonal N-by-M matrix - B with diagonal D and offdiagonal E, where M = N + SQRE. The - algorithm computes the singular values in the SVD B = U * S * VT. - The orthogonal matrices U and VT are optionally computed in - compact form. - - A related subroutine, SLASD0, computes the singular values and - the singular vectors in explicit form. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed - in compact form, as follows - = 0: Compute singular values only. - = 1: Compute singular vectors of upper bidiagonal - matrix in compact form. - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The row dimension of the upper bidiagonal matrix. This is - also the dimension of the main diagonal array D. - - SQRE (input) INTEGER - Specifies the column dimension of the bidiagonal matrix. - = 0: The bidiagonal matrix has column dimension M = N; - = 1: The bidiagonal matrix has column dimension M = N + 1. - - D (input/output) REAL array, dimension ( N ) - On entry D contains the main diagonal of the bidiagonal - matrix. On exit D, if INFO = 0, contains its singular values. - - E (input) REAL array, dimension ( M-1 ) - Contains the subdiagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - U (output) REAL array, - dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left - singular vector matrices of all subproblems at the bottom - level. - - LDU (input) INTEGER, LDU = > N. - The leading dimension of arrays U, VT, DIFL, DIFR, POLES, - GIVNUM, and Z. - - VT (output) REAL array, - dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right - singular vector matrices of all subproblems at the bottom - level. - - K (output) INTEGER array, - dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. - If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th - secular equation on the computation tree. - - DIFL (output) REAL array, dimension ( LDU, NLVL ), - where NLVL = floor(log_2 (N/SMLSIZ))). - - DIFR (output) REAL array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) - record distances between singular values on the I-th - level and singular values on the (I -1)-th level, and - DIFR(1:N, 2 * I ) contains the normalizing factors for - the right singular vector matrix. See SLASD8 for details. - - Z (output) REAL array, - dimension ( LDU, NLVL ) if ICOMPQ = 1 and - dimension ( N ) if ICOMPQ = 0. - The first K elements of Z(1, I) contain the components of - the deflation-adjusted updating row vector for subproblems - on the I-th level. - - POLES (output) REAL array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and - POLES(1, 2*I) contain the new and old singular values - involved in the secular equations on the I-th level. - - GIVPTR (output) INTEGER array, - dimension ( N ) if ICOMPQ = 1, and not referenced if - ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records - the number of Givens rotations performed on the I-th - problem on the computation tree. - - GIVCOL (output) INTEGER array, - dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not - referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, - GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations - of Givens rotations performed on the I-th level on the - computation tree. - - LDGCOL (input) INTEGER, LDGCOL = > N. - The leading dimension of arrays GIVCOL and PERM. - - PERM (output) INTEGER array, - dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced - if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records - permutations done on the I-th level of the computation tree. - - GIVNUM (output) REAL array, - dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not - referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, - GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- - values of Givens rotations performed on the I-th level on - the computation tree. - - C (output) REAL array, - dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. - If ICOMPQ = 1 and the I-th subproblem is not square, on exit, - C( I ) contains the C-value of a Givens rotation related to - the right null space of the I-th subproblem. - - S (output) REAL array, dimension ( N ) if - ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 - and the I-th subproblem is not square, on exit, S( I ) - contains the S-value of a Givens rotation related to - the right null space of the I-th subproblem. - - WORK (workspace) REAL array, dimension - (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). - - IWORK (workspace) INTEGER array. - Dimension must be at least (7 * N). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an singular value did not converge - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - givnum_dim1 = *ldu; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - poles_dim1 = *ldu; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - z_dim1 = *ldu; - z_offset = 1 + z_dim1; - z__ -= z_offset; - difr_dim1 = *ldu; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - difl_dim1 = *ldu; - difl_offset = 1 + difl_dim1; - difl -= difl_offset; - vt_dim1 = *ldu; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - --k; - --givptr; - perm_dim1 = *ldgcol; - perm_offset = 1 + perm_dim1; - perm -= perm_offset; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - --c__; - --s; - --work; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*smlsiz < 3) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } else if (*ldu < *n + *sqre) { - *info = -8; - } else if (*ldgcol < *n) { - *info = -17; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASDA", &i__1); - return 0; - } - - m = *n + *sqre; - -/* If the input matrix is too small, call SLASDQ to find the SVD. */ - - if (*n <= *smlsiz) { - if (*icompq == 0) { - slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ - vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & - work[1], info); - } else { - slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] - , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], - info); - } - return 0; - } - -/* Book-keeping and set up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - idxq = ndimr + *n; - iwk = idxq + *n; - - ncc = 0; - nru = 0; - - smlszp = *smlsiz + 1; - vf = 1; - vl = vf + m; - nwork1 = vl + m; - nwork2 = nwork1 + smlszp * smlszp; - - slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - for the nodes on bottom level of the tree, solve - their subproblems by SLASDQ. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nlp1 = nl + 1; - nr = iwork[ndimr + i1]; - nlf = ic - nl; - nrf = ic + 1; - idxqi = idxq + nlf - 2; - vfi = vf + nlf - 1; - vli = vl + nlf - 1; - sqrei = 1; - if (*icompq == 0) { - slaset_("A", &nlp1, &nlp1, &c_b1101, &c_b871, &work[nwork1], & - smlszp); - slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & - work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], - &nl, &work[nwork2], info); - itemp = nwork1 + nl * smlszp; - scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); - scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); - } else { - slaset_("A", &nl, &nl, &c_b1101, &c_b871, &u[nlf + u_dim1], ldu); - slaset_("A", &nlp1, &nlp1, &c_b1101, &c_b871, &vt[nlf + vt_dim1], - ldu); - slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & - vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + - u_dim1], ldu, &work[nwork1], info); - scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); - scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) - ; - } - if (*info != 0) { - return 0; - } - i__2 = nl; - for (j = 1; j <= i__2; ++j) { - iwork[idxqi + j] = j; -/* L10: */ - } - if (i__ == nd && *sqre == 0) { - sqrei = 0; - } else { - sqrei = 1; - } - idxqi += nlp1; - vfi += nlp1; - vli += nlp1; - nrp1 = nr + sqrei; - if (*icompq == 0) { - slaset_("A", &nrp1, &nrp1, &c_b1101, &c_b871, &work[nwork1], & - smlszp); - slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & - work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], - &nr, &work[nwork2], info); - itemp = nwork1 + (nrp1 - 1) * smlszp; - scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); - scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); - } else { - slaset_("A", &nr, &nr, &c_b1101, &c_b871, &u[nrf + u_dim1], ldu); - slaset_("A", &nrp1, &nrp1, &c_b1101, &c_b871, &vt[nrf + vt_dim1], - ldu); - slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & - vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + - u_dim1], ldu, &work[nwork1], info); - scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); - scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) - ; - } - if (*info != 0) { - return 0; - } - i__2 = nr; - for (j = 1; j <= i__2; ++j) { - iwork[idxqi + j] = j; -/* L20: */ - } -/* L30: */ - } - -/* Now conquer each subproblem bottom-up. */ - - j = pow_ii(&c__2, &nlvl); - for (lvl = nlvl; lvl >= 1; --lvl) { - lvl2 = (lvl << 1) - 1; - -/* - Find the first node LF and last node LL on - the current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - if (i__ == ll) { - sqrei = *sqre; - } else { - sqrei = 1; - } - vfi = vf + nlf - 1; - vli = vl + nlf - 1; - idxqi = idxq + nlf - 1; - alpha = d__[ic]; - beta = e[ic]; - if (*icompq == 0) { - slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & - work[vli], &alpha, &beta, &iwork[idxqi], &perm[ - perm_offset], &givptr[1], &givcol[givcol_offset], - ldgcol, &givnum[givnum_offset], ldu, &poles[ - poles_offset], &difl[difl_offset], &difr[difr_offset], - &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], - &iwork[iwk], info); - } else { - --j; - slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & - work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + - lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * - givcol_dim1], ldgcol, &givnum[nlf + lvl2 * - givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & - difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * - difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], - &s[j], &work[nwork1], &iwork[iwk], info); - } - if (*info != 0) { - return 0; - } -/* L40: */ - } -/* L50: */ - } - - return 0; - -/* End of SLASDA */ - -} /* slasda_ */ - -/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer * - ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, - integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * - work, integer *info) -{ - /* System generated locals */ - integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2; - - /* Local variables */ - static integer i__, j; - static real r__, cs, sn; - static integer np1, isub; - static real smin; - static integer sqre1; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, - integer *, real *, real *, real *, integer *); - static integer iuplo; - extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, - integer *), xerbla_(char *, integer *), slartg_(real *, - real *, real *, real *, real *); - static logical rotate; - extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer - *, integer *, real *, real *, real *, integer *, real *, integer * - , real *, integer *, real *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASDQ computes the singular value decomposition (SVD) of a real - (upper or lower) bidiagonal matrix with diagonal D and offdiagonal - E, accumulating the transformations if desired. Letting B denote - the input bidiagonal matrix, the algorithm computes orthogonal - matrices Q and P such that B = Q * S * P' (P' denotes the transpose - of P). The singular values S are overwritten on D. - - The input matrix U is changed to U * Q if desired. - The input matrix VT is changed to P' * VT if desired. - The input matrix C is changed to Q' * C if desired. - - See "Computing Small Singular Values of Bidiagonal Matrices With - Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, - LAPACK Working Note #3, for a detailed description of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - On entry, UPLO specifies whether the input bidiagonal matrix - is upper or lower bidiagonal, and wether it is square are - not. - UPLO = 'U' or 'u' B is upper bidiagonal. - UPLO = 'L' or 'l' B is lower bidiagonal. - - SQRE (input) INTEGER - = 0: then the input matrix is N-by-N. - = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and - (N+1)-by-N if UPLU = 'L'. - - The bidiagonal matrix has - N = NL + NR + 1 rows and - M = N + SQRE >= N columns. - - N (input) INTEGER - On entry, N specifies the number of rows and columns - in the matrix. N must be at least 0. - - NCVT (input) INTEGER - On entry, NCVT specifies the number of columns of - the matrix VT. NCVT must be at least 0. - - NRU (input) INTEGER - On entry, NRU specifies the number of rows of - the matrix U. NRU must be at least 0. - - NCC (input) INTEGER - On entry, NCC specifies the number of columns of - the matrix C. NCC must be at least 0. - - D (input/output) REAL array, dimension (N) - On entry, D contains the diagonal entries of the - bidiagonal matrix whose SVD is desired. On normal exit, - D contains the singular values in ascending order. - - E (input/output) REAL array. - dimension is (N-1) if SQRE = 0 and N if SQRE = 1. - On entry, the entries of E contain the offdiagonal entries - of the bidiagonal matrix whose SVD is desired. On normal - exit, E will contain 0. If the algorithm does not converge, - D and E will contain the diagonal and superdiagonal entries - of a bidiagonal matrix orthogonally equivalent to the one - given as input. - - VT (input/output) REAL array, dimension (LDVT, NCVT) - On entry, contains a matrix which on exit has been - premultiplied by P', dimension N-by-NCVT if SQRE = 0 - and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). - - LDVT (input) INTEGER - On entry, LDVT specifies the leading dimension of VT as - declared in the calling (sub) program. LDVT must be at - least 1. If NCVT is nonzero LDVT must also be at least N. - - U (input/output) REAL array, dimension (LDU, N) - On entry, contains a matrix which on exit has been - postmultiplied by Q, dimension NRU-by-N if SQRE = 0 - and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). - - LDU (input) INTEGER - On entry, LDU specifies the leading dimension of U as - declared in the calling (sub) program. LDU must be at - least max( 1, NRU ) . - - C (input/output) REAL array, dimension (LDC, NCC) - On entry, contains an N-by-NCC matrix which on exit - has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 - and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). - - LDC (input) INTEGER - On entry, LDC specifies the leading dimension of C as - declared in the calling (sub) program. LDC must be at - least 1. If NCC is nonzero, LDC must also be at least N. - - WORK (workspace) REAL array, dimension (4*N) - Workspace. Only referenced if one of NCVT, NRU, or NCC is - nonzero, and if N is at least 2. - - INFO (output) INTEGER - On exit, a value of 0 indicates a successful exit. - If INFO < 0, argument number -INFO is illegal. - If INFO > 0, the algorithm did not converge, and INFO - specifies how many superdiagonals did not converge. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - iuplo = 0; - if (lsame_(uplo, "U")) { - iuplo = 1; - } - if (lsame_(uplo, "L")) { - iuplo = 2; - } - if (iuplo == 0) { - *info = -1; - } else if (*sqre < 0 || *sqre > 1) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ncvt < 0) { - *info = -4; - } else if (*nru < 0) { - *info = -5; - } else if (*ncc < 0) { - *info = -6; - } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { - *info = -10; - } else if (*ldu < max(1,*nru)) { - *info = -12; - } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { - *info = -14; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASDQ", &i__1); - return 0; - } - if (*n == 0) { - return 0; - } - -/* ROTATE is true if any singular vectors desired, false otherwise */ - - rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; - np1 = *n + 1; - sqre1 = *sqre; - -/* - If matrix non-square upper bidiagonal, rotate to be lower - bidiagonal. The rotations are on the right. -*/ - - if (iuplo == 1 && sqre1 == 1) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (rotate) { - work[i__] = cs; - work[*n + i__] = sn; - } -/* L10: */ - } - slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); - d__[*n] = r__; - e[*n] = 0.f; - if (rotate) { - work[*n] = cs; - work[*n + *n] = sn; - } - iuplo = 2; - sqre1 = 0; - -/* Update singular vectors if desired. */ - - if (*ncvt > 0) { - slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ - vt_offset], ldvt); - } - } - -/* - If matrix lower bidiagonal, rotate to be upper bidiagonal - by applying Givens rotations on the left. -*/ - - if (iuplo == 2) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (rotate) { - work[i__] = cs; - work[*n + i__] = sn; - } -/* L20: */ - } - -/* - If matrix (N+1)-by-N lower bidiagonal, one additional - rotation is needed. -*/ - - if (sqre1 == 1) { - slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); - d__[*n] = r__; - if (rotate) { - work[*n] = cs; - work[*n + *n] = sn; - } - } - -/* Update singular vectors if desired. */ - - if (*nru > 0) { - if (sqre1 == 0) { - slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ - u_offset], ldu); - } else { - slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ - u_offset], ldu); - } - } - if (*ncc > 0) { - if (sqre1 == 0) { - slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ - c_offset], ldc); - } else { - slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ - c_offset], ldc); - } - } - } - -/* - Call SBDSQR to compute the SVD of the reduced real - N-by-N upper bidiagonal matrix. -*/ - - sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ - u_offset], ldu, &c__[c_offset], ldc, &work[1], info); - -/* - Sort the singular values into ascending order (insertion sort on - singular values, but only one transposition per singular vector) -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Scan for smallest D(I). */ - - isub = i__; - smin = d__[i__]; - i__2 = *n; - for (j = i__ + 1; j <= i__2; ++j) { - if (d__[j] < smin) { - isub = j; - smin = d__[j]; - } -/* L30: */ - } - if (isub != i__) { - -/* Swap singular values and vectors. */ - - d__[isub] = d__[i__]; - d__[i__] = smin; - if (*ncvt > 0) { - sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], - ldvt); - } - if (*nru > 0) { - sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] - , &c__1); - } - if (*ncc > 0) { - sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) - ; - } - } -/* L40: */ - } - - return 0; - -/* End of SLASDQ */ - -} /* slasdq_ */ - -/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer * - inode, integer *ndiml, integer *ndimr, integer *msub) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Builtin functions */ - double log(doublereal); - - /* Local variables */ - static integer i__, il, ir, maxn; - static real temp; - static integer nlvl, llst, ncrnt; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASDT creates a tree of subproblems for bidiagonal divide and - conquer. - - Arguments - ========= - - N (input) INTEGER - On entry, the number of diagonal elements of the - bidiagonal matrix. - - LVL (output) INTEGER - On exit, the number of levels on the computation tree. - - ND (output) INTEGER - On exit, the number of nodes on the tree. - - INODE (output) INTEGER array, dimension ( N ) - On exit, centers of subproblems. - - NDIML (output) INTEGER array, dimension ( N ) - On exit, row dimensions of left children. - - NDIMR (output) INTEGER array, dimension ( N ) - On exit, row dimensions of right children. - - MSUB (input) INTEGER. - On entry, the maximum row dimension each subproblem at the - bottom of the tree can be of. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Find the number of levels on the tree. -*/ - - /* Parameter adjustments */ - --ndimr; - --ndiml; - --inode; - - /* Function Body */ - maxn = max(1,*n); - temp = log((real) maxn / (real) (*msub + 1)) / log(2.f); - *lvl = (integer) temp + 1; - - i__ = *n / 2; - inode[1] = i__ + 1; - ndiml[1] = i__; - ndimr[1] = *n - i__ - 1; - il = 0; - ir = 1; - llst = 1; - i__1 = *lvl - 1; - for (nlvl = 1; nlvl <= i__1; ++nlvl) { - -/* - Constructing the tree at (NLVL+1)-st level. The number of - nodes created on this level is LLST * 2. -*/ - - i__2 = llst - 1; - for (i__ = 0; i__ <= i__2; ++i__) { - il += 2; - ir += 2; - ncrnt = llst + i__; - ndiml[il] = ndiml[ncrnt] / 2; - ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; - inode[il] = inode[ncrnt] - ndimr[il] - 1; - ndiml[ir] = ndimr[ncrnt] / 2; - ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; - inode[ir] = inode[ncrnt] + ndiml[ir] + 1; -/* L10: */ - } - llst <<= 1; -/* L20: */ - } - *nd = (llst << 1) - 1; - - return 0; - -/* End of SLASDT */ - -} /* slasdt_ */ - -/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha, - real *beta, real *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLASET initializes an m-by-n matrix A to BETA on the diagonal and - ALPHA on the offdiagonals. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be set. - = 'U': Upper triangular part is set; the strictly lower - triangular part of A is not changed. - = 'L': Lower triangular part is set; the strictly upper - triangular part of A is not changed. - Otherwise: All of the matrix A is set. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - ALPHA (input) REAL - The constant to which the offdiagonal elements are to be set. - - BETA (input) REAL - The constant to which the diagonal elements are to be set. - - A (input/output) REAL array, dimension (LDA,N) - On exit, the leading m-by-n submatrix of A is set as follows: - - if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, - if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, - otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, - - and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - -/* - Set the strictly upper triangular or trapezoidal part of the - array to ALPHA. -*/ - - i__1 = *n; - for (j = 2; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j - 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - -/* - Set the strictly lower triangular or trapezoidal part of the - array to ALPHA. -*/ - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L30: */ - } -/* L40: */ - } - - } else { - -/* Set the leading m-by-n submatrix to ALPHA. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = *alpha; -/* L50: */ - } -/* L60: */ - } - } - -/* Set the first min(M,N) diagonal elements to BETA. */ - - i__1 = min(*m,*n); - for (i__ = 1; i__ <= i__1; ++i__) { - a[i__ + i__ * a_dim1] = *beta; -/* L70: */ - } - - return 0; - -/* End of SLASET */ - -} /* slaset_ */ - -/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work, - integer *info) -{ - /* System generated locals */ - integer i__1, i__2; - real r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__; - static real eps; - extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *) - ; - static real scale; - static integer iinfo; - static real sigmn, sigmx; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *), slasq2_(integer *, real *, integer *); - extern doublereal slamch_(char *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( - char *, integer *, integer *, real *, real *, integer *, integer * - , real *, integer *, integer *), slasrt_(char *, integer * - , real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASQ1 computes the singular values of a real N-by-N bidiagonal - matrix with diagonal D and off-diagonal E. The singular values - are computed to high relative accuracy, in the absence of - denormalization, underflow and overflow. The algorithm was first - presented in - - "Accurate singular values and differential qd algorithms" by K. V. - Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, - 1994, - - and the present implementation is described in "An implementation of - the dqds Algorithm (Positive Case)", LAPACK Working Note. - - Arguments - ========= - - N (input) INTEGER - The number of rows and columns in the matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, D contains the diagonal elements of the - bidiagonal matrix whose SVD is desired. On normal exit, - D contains the singular values in decreasing order. - - E (input/output) REAL array, dimension (N) - On entry, elements E(1:N-1) contain the off-diagonal elements - of the bidiagonal matrix whose SVD is desired. - On exit, E is overwritten. - - WORK (workspace) REAL array, dimension (4*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm failed - = 1, a split was marked by a positive value in E - = 2, current block of Z not diagonalized after 30*N - iterations (in inner while loop) - = 3, termination criterion of outer while loop not met - (program created more than N unreduced blocks) - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --work; - --e; - --d__; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -2; - i__1 = -(*info); - xerbla_("SLASQ1", &i__1); - return 0; - } else if (*n == 0) { - return 0; - } else if (*n == 1) { - d__[1] = dabs(d__[1]); - return 0; - } else if (*n == 2) { - slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); - d__[1] = sigmx; - d__[2] = sigmn; - return 0; - } - -/* Estimate the largest singular value. */ - - sigmx = 0.f; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = (r__1 = d__[i__], dabs(r__1)); -/* Computing MAX */ - r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1)); - sigmx = dmax(r__2,r__3); -/* L10: */ - } - d__[*n] = (r__1 = d__[*n], dabs(r__1)); - -/* Early return if SIGMX is zero (matrix is already diagonal). */ - - if (sigmx == 0.f) { - slasrt_("D", n, &d__[1], &iinfo); - return 0; - } - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - r__1 = sigmx, r__2 = d__[i__]; - sigmx = dmax(r__1,r__2); -/* L20: */ - } - -/* - Copy D and E into WORK (in the Z format) and scale (squaring the - input data makes scaling by a power of the radix pointless). -*/ - - eps = slamch_("Precision"); - safmin = slamch_("Safe minimum"); - scale = sqrt(eps / safmin); - scopy_(n, &d__[1], &c__1, &work[1], &c__2); - i__1 = *n - 1; - scopy_(&i__1, &e[1], &c__1, &work[2], &c__2); - i__1 = (*n << 1) - 1; - i__2 = (*n << 1) - 1; - slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, - &iinfo); - -/* Compute the q's and e's. */ - - i__1 = (*n << 1) - 1; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing 2nd power */ - r__1 = work[i__]; - work[i__] = r__1 * r__1; -/* L30: */ - } - work[*n * 2] = 0.f; - - slasq2_(n, &work[1], info); - - if (*info == 0) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = sqrt(work[i__]); -/* L40: */ - } - slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & - iinfo); - } - - return 0; - -/* End of SLASQ1 */ - -} /* slasq1_ */ - -/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real d__, e; - static integer k; - static real s, t; - static integer i0, i4, n0, pp; - static real eps, tol; - static integer ipn4; - static real tol2; - static logical ieee; - static integer nbig; - static real dmin__, emin, emax; - static integer ndiv, iter; - static real qmin, temp, qmax, zmax; - static integer splt, nfail; - static real desig, trace, sigma; - static integer iinfo; - extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer - *, real *, real *, real *, real *, integer *, integer *, integer * - , logical *); - extern doublereal slamch_(char *); - static integer iwhila, iwhilb; - static real oldemn, safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASQ2 computes all the eigenvalues of the symmetric positive - definite tridiagonal matrix associated with the qd array Z to high - relative accuracy are computed to high relative accuracy, in the - absence of denormalization, underflow and overflow. - - To see the relation of Z to the tridiagonal matrix, let L be a - unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and - let U be an upper bidiagonal matrix with 1's above and diagonal - Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the - symmetric tridiagonal to which it is similar. - - Note : SLASQ2 defines a logical variable, IEEE, which is true - on machines which follow ieee-754 floating-point standard in their - handling of infinities and NaNs, and false otherwise. This variable - is passed to SLASQ3. - - Arguments - ========= - - N (input) INTEGER - The number of rows and columns in the matrix. N >= 0. - - Z (workspace) REAL array, dimension ( 4*N ) - On entry Z holds the qd array. On exit, entries 1 to N hold - the eigenvalues in decreasing order, Z( 2*N+1 ) holds the - trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If - N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) - holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of - shifts that failed. - - INFO (output) INTEGER - = 0: successful exit - < 0: if the i-th argument is a scalar and had an illegal - value, then INFO = -i, if the i-th argument is an - array and the j-entry had an illegal value, then - INFO = -(i*100+j) - > 0: the algorithm failed - = 1, a split was marked by a positive value in E - = 2, current block of Z not diagonalized after 30*N - iterations (in inner while loop) - = 3, termination criterion of outer while loop not met - (program created more than N unreduced blocks) - - Further Details - =============== - Local Variables: I0:N0 defines a current unreduced segment of Z. - The shifts are accumulated in SIGMA. Iteration count is in ITER. - Ping-pong is controlled by PP (alternates between 0 and 1). - - ===================================================================== - - - Test the input arguments. - (in case SLASQ2 is not called by SLASQ1) -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - *info = 0; - eps = slamch_("Precision"); - safmin = slamch_("Safe minimum"); - tol = eps * 100.f; -/* Computing 2nd power */ - r__1 = tol; - tol2 = r__1 * r__1; - - if (*n < 0) { - *info = -1; - xerbla_("SLASQ2", &c__1); - return 0; - } else if (*n == 0) { - return 0; - } else if (*n == 1) { - -/* 1-by-1 case. */ - - if (z__[1] < 0.f) { - *info = -201; - xerbla_("SLASQ2", &c__2); - } - return 0; - } else if (*n == 2) { - -/* 2-by-2 case. */ - - if (z__[2] < 0.f || z__[3] < 0.f) { - *info = -2; - xerbla_("SLASQ2", &c__2); - return 0; - } else if (z__[3] > z__[1]) { - d__ = z__[3]; - z__[3] = z__[1]; - z__[1] = d__; - } - z__[5] = z__[1] + z__[2] + z__[3]; - if (z__[2] > z__[3] * tol2) { - t = (z__[1] - z__[3] + z__[2]) * .5f; - s = z__[3] * (z__[2] / t); - if (s <= t) { - s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f))); - } else { - s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[1] + (s + z__[2]); - z__[3] *= z__[1] / t; - z__[1] = t; - } - z__[2] = z__[3]; - z__[6] = z__[2] + z__[1]; - return 0; - } - -/* Check for negative data and compute sums of q's and e's. */ - - z__[*n * 2] = 0.f; - emin = z__[2]; - qmax = 0.f; - zmax = 0.f; - d__ = 0.f; - e = 0.f; - - i__1 = *n - 1 << 1; - for (k = 1; k <= i__1; k += 2) { - if (z__[k] < 0.f) { - *info = -(k + 200); - xerbla_("SLASQ2", &c__2); - return 0; - } else if (z__[k + 1] < 0.f) { - *info = -(k + 201); - xerbla_("SLASQ2", &c__2); - return 0; - } - d__ += z__[k]; - e += z__[k + 1]; -/* Computing MAX */ - r__1 = qmax, r__2 = z__[k]; - qmax = dmax(r__1,r__2); -/* Computing MIN */ - r__1 = emin, r__2 = z__[k + 1]; - emin = dmin(r__1,r__2); -/* Computing MAX */ - r__1 = max(qmax,zmax), r__2 = z__[k + 1]; - zmax = dmax(r__1,r__2); -/* L10: */ - } - if (z__[(*n << 1) - 1] < 0.f) { - *info = -((*n << 1) + 199); - xerbla_("SLASQ2", &c__2); - return 0; - } - d__ += z__[(*n << 1) - 1]; -/* Computing MAX */ - r__1 = qmax, r__2 = z__[(*n << 1) - 1]; - qmax = dmax(r__1,r__2); - zmax = dmax(qmax,zmax); - -/* Check for diagonality. */ - - if (e == 0.f) { - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 1) - 1]; -/* L20: */ - } - slasrt_("D", n, &z__[1], &iinfo); - z__[(*n << 1) - 1] = d__; - return 0; - } - - trace = d__ + e; - -/* Check for zero data. */ - - if (trace == 0.f) { - z__[(*n << 1) - 1] = 0.f; - return 0; - } - -/* Check whether the machine is IEEE conformable. */ - - ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) - 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2, - &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; - -/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ - - for (k = *n << 1; k >= 2; k += -2) { - z__[k * 2] = 0.f; - z__[(k << 1) - 1] = z__[k]; - z__[(k << 1) - 2] = 0.f; - z__[(k << 1) - 3] = z__[k - 1]; -/* L30: */ - } - - i0 = 1; - n0 = *n; - -/* Reverse the qd-array, if warranted. */ - - if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) { - ipn4 = i0 + n0 << 2; - i__1 = i0 + n0 - 1 << 1; - for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { - temp = z__[i4 - 3]; - z__[i4 - 3] = z__[ipn4 - i4 - 3]; - z__[ipn4 - i4 - 3] = temp; - temp = z__[i4 - 1]; - z__[i4 - 1] = z__[ipn4 - i4 - 5]; - z__[ipn4 - i4 - 5] = temp; -/* L40: */ - } - } - -/* Initial split checking via dqd and Li's test. */ - - pp = 0; - - for (k = 1; k <= 2; ++k) { - - d__ = z__[(n0 << 2) + pp - 3]; - i__1 = (i0 << 2) + pp; - for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.f; - d__ = z__[i4 - 3]; - } else { - d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); - } -/* L50: */ - } - -/* dqd maps Z to ZZ plus Li's test. */ - - emin = z__[(i0 << 2) + pp + 1]; - d__ = z__[(i0 << 2) + pp - 3]; - i__1 = (n0 - 1 << 2) + pp; - for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { - z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; - if (z__[i4 - 1] <= tol2 * d__) { - z__[i4 - 1] = -0.f; - z__[i4 - (pp << 1) - 2] = d__; - z__[i4 - (pp << 1)] = 0.f; - d__ = z__[i4 + 1]; - } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && - safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { - temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; - z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; - d__ *= temp; - } else { - z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( - pp << 1) - 2]); - d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); - } -/* Computing MIN */ - r__1 = emin, r__2 = z__[i4 - (pp << 1)]; - emin = dmin(r__1,r__2); -/* L60: */ - } - z__[(n0 << 2) - pp - 2] = d__; - -/* Now find qmax. */ - - qmax = z__[(i0 << 2) - pp - 2]; - i__1 = (n0 << 2) - pp - 2; - for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { -/* Computing MAX */ - r__1 = qmax, r__2 = z__[i4]; - qmax = dmax(r__1,r__2); -/* L70: */ - } - -/* Prepare for the next iteration on K. */ - - pp = 1 - pp; -/* L80: */ - } - - iter = 2; - nfail = 0; - ndiv = n0 - i0 << 1; - - i__1 = *n + 1; - for (iwhila = 1; iwhila <= i__1; ++iwhila) { - if (n0 < 1) { - goto L150; - } - -/* - While array unfinished do - - E(N0) holds the value of SIGMA when submatrix in I0:N0 - splits from the rest of the array, but is negated. -*/ - - desig = 0.f; - if (n0 == *n) { - sigma = 0.f; - } else { - sigma = -z__[(n0 << 2) - 1]; - } - if (sigma < 0.f) { - *info = 1; - return 0; - } - -/* - Find last unreduced submatrix's top index I0, find QMAX and - EMIN. Find Gershgorin-type bound if Q's much greater than E's. -*/ - - emax = 0.f; - if (n0 > i0) { - emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1)); - } else { - emin = 0.f; - } - qmin = z__[(n0 << 2) - 3]; - qmax = qmin; - for (i4 = n0 << 2; i4 >= 8; i4 += -4) { - if (z__[i4 - 5] <= 0.f) { - goto L100; - } - if (qmin >= emax * 4.f) { -/* Computing MIN */ - r__1 = qmin, r__2 = z__[i4 - 3]; - qmin = dmin(r__1,r__2); -/* Computing MAX */ - r__1 = emax, r__2 = z__[i4 - 5]; - emax = dmax(r__1,r__2); - } -/* Computing MAX */ - r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5]; - qmax = dmax(r__1,r__2); -/* Computing MIN */ - r__1 = emin, r__2 = z__[i4 - 5]; - emin = dmin(r__1,r__2); -/* L90: */ - } - i4 = 4; - -L100: - i0 = i4 / 4; - -/* Store EMIN for passing to SLASQ3. */ - - z__[(n0 << 2) - 1] = emin; - -/* - Put -(initial shift) into DMIN. - - Computing MAX -*/ - r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax); - dmin__ = -dmax(r__1,r__2); - -/* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */ - - pp = 0; - - nbig = (n0 - i0 + 1) * 30; - i__2 = nbig; - for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { - if (i0 > n0) { - goto L130; - } - -/* While submatrix unfinished take a good dqds step. */ - - slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & - nfail, &iter, &ndiv, &ieee); - - pp = 1 - pp; - -/* When EMIN is very small check for splits. */ - - if (pp == 0 && n0 - i0 >= 3) { - if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * - sigma) { - splt = i0 - 1; - qmax = z__[(i0 << 2) - 3]; - emin = z__[(i0 << 2) - 1]; - oldemn = z__[i0 * 4]; - i__3 = n0 - 3 << 2; - for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { - if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= - tol2 * sigma) { - z__[i4 - 1] = -sigma; - splt = i4 / 4; - qmax = 0.f; - emin = z__[i4 + 3]; - oldemn = z__[i4 + 4]; - } else { -/* Computing MAX */ - r__1 = qmax, r__2 = z__[i4 + 1]; - qmax = dmax(r__1,r__2); -/* Computing MIN */ - r__1 = emin, r__2 = z__[i4 - 1]; - emin = dmin(r__1,r__2); -/* Computing MIN */ - r__1 = oldemn, r__2 = z__[i4]; - oldemn = dmin(r__1,r__2); - } -/* L110: */ - } - z__[(n0 << 2) - 1] = emin; - z__[n0 * 4] = oldemn; - i0 = splt + 1; - } - } - -/* L120: */ - } - - *info = 2; - return 0; - -/* end IWHILB */ - -L130: - -/* L140: */ - ; - } - - *info = 3; - return 0; - -/* end IWHILA */ - -L150: - -/* Move q's to the front. */ - - i__1 = *n; - for (k = 2; k <= i__1; ++k) { - z__[k] = z__[(k << 2) - 3]; -/* L160: */ - } - -/* Sort and compute sum of eigenvalues. */ - - slasrt_("D", n, &z__[1], &iinfo); - - e = 0.f; - for (k = *n; k >= 1; --k) { - e += z__[k]; -/* L170: */ - } - -/* Store trace, sum(eigenvalues) and information on performance. */ - - z__[(*n << 1) + 1] = trace; - z__[(*n << 1) + 2] = e; - z__[(*n << 1) + 3] = (real) iter; -/* Computing 2nd power */ - i__1 = *n; - z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1); - z__[(*n << 1) + 5] = nfail * 100.f / (real) iter; - return 0; - -/* End of SLASQ2 */ - -} /* slasq2_ */ - -/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp, - real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, - integer *iter, integer *ndiv, logical *ieee) -{ - /* Initialized data */ - - static integer ttype = 0; - static real dmin1 = 0.f; - static real dmin2 = 0.f; - static real dn = 0.f; - static real dn1 = 0.f; - static real dn2 = 0.f; - static real tau = 0.f; - - /* System generated locals */ - integer i__1; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real s, t; - static integer j4, nn; - static real eps, tol; - static integer n0in, ipn4; - static real tol2, temp; - extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer - *, integer *, real *, real *, real *, real *, real *, real *, - real *, integer *), slasq5_(integer *, integer *, real *, integer - *, real *, real *, real *, real *, real *, real *, real *, - logical *), slasq6_(integer *, integer *, real *, integer *, real - *, real *, real *, real *, real *, real *); - extern doublereal slamch_(char *); - static real safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - May 17, 2000 - - - Purpose - ======= - - SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. - In case of failure it changes shifts, and tries again until output - is positive. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) REAL array, dimension ( 4*N ) - Z holds the qd array. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - DMIN (output) REAL - Minimum value of d. - - SIGMA (output) REAL - Sum of shifts used in current segment. - - DESIG (input/output) REAL - Lower order part of SIGMA - - QMAX (input) REAL - Maximum value of q. - - NFAIL (output) INTEGER - Number of times shift was too big. - - ITER (output) INTEGER - Number of iterations. - - NDIV (output) INTEGER - Number of divisions. - - TTYPE (output) INTEGER - Shift type. - - IEEE (input) LOGICAL - Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - - n0in = *n0; - eps = slamch_("Precision"); - safmin = slamch_("Safe minimum"); - tol = eps * 100.f; -/* Computing 2nd power */ - r__1 = tol; - tol2 = r__1 * r__1; - -/* Check for deflation. */ - -L10: - - if (*n0 < *i0) { - return 0; - } - if (*n0 == *i0) { - goto L20; - } - nn = (*n0 << 2) + *pp; - if (*n0 == *i0 + 1) { - goto L40; - } - -/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ - - if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - - 4] > tol2 * z__[nn - 7]) { - goto L30; - } - -L20: - - z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; - --(*n0); - goto L10; - -/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ - -L30: - - if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ - nn - 11]) { - goto L50; - } - -L40: - - if (z__[nn - 3] > z__[nn - 7]) { - s = z__[nn - 3]; - z__[nn - 3] = z__[nn - 7]; - z__[nn - 7] = s; - } - if (z__[nn - 5] > z__[nn - 3] * tol2) { - t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f; - s = z__[nn - 3] * (z__[nn - 5] / t); - if (s <= t) { - s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f))); - } else { - s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); - } - t = z__[nn - 7] + (s + z__[nn - 5]); - z__[nn - 3] *= z__[nn - 7] / t; - z__[nn - 7] = t; - } - z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; - z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; - *n0 += -2; - goto L10; - -L50: - -/* Reverse the qd-array, if warranted. */ - - if (*dmin__ <= 0.f || *n0 < n0in) { - if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) { - ipn4 = *i0 + *n0 << 2; - i__1 = *i0 + *n0 - 1 << 1; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - temp = z__[j4 - 3]; - z__[j4 - 3] = z__[ipn4 - j4 - 3]; - z__[ipn4 - j4 - 3] = temp; - temp = z__[j4 - 2]; - z__[j4 - 2] = z__[ipn4 - j4 - 2]; - z__[ipn4 - j4 - 2] = temp; - temp = z__[j4 - 1]; - z__[j4 - 1] = z__[ipn4 - j4 - 5]; - z__[ipn4 - j4 - 5] = temp; - temp = z__[j4]; - z__[j4] = z__[ipn4 - j4 - 4]; - z__[ipn4 - j4 - 4] = temp; -/* L60: */ - } - if (*n0 - *i0 <= 4) { - z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; - z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; - } -/* Computing MIN */ - r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1]; - dmin2 = dmin(r__1,r__2); -/* Computing MIN */ - r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1] - , r__1 = min(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3]; - z__[(*n0 << 2) + *pp - 1] = dmin(r__1,r__2); -/* Computing MIN */ - r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 = - min(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4]; - z__[(*n0 << 2) - *pp] = dmin(r__1,r__2); -/* Computing MAX */ - r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = max(r__1, - r__2), r__2 = z__[(*i0 << 2) + *pp + 1]; - *qmax = dmax(r__1,r__2); - *dmin__ = -0.f; - } - } - -/* - L70: - - Computing MIN -*/ - r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 = - min(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp]; - if (*dmin__ < 0.f || safmin * *qmax < dmin(r__1,r__2)) { - -/* Choose a shift. */ - - slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1, - &dn2, &tau, &ttype); - -/* Call dqds until DMIN > 0. */ - -L80: - - slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1, - &dn2, ieee); - - *ndiv += *n0 - *i0 + 2; - ++(*iter); - -/* Check status. */ - - if (*dmin__ >= 0.f && dmin1 > 0.f) { - -/* Success. */ - - goto L100; - - } else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] < - tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) { - -/* Convergence hidden by negative DN. */ - - z__[(*n0 - 1 << 2) - *pp + 2] = 0.f; - *dmin__ = 0.f; - goto L100; - } else if (*dmin__ < 0.f) { - -/* TAU too big. Select new TAU and try again. */ - - ++(*nfail); - if (ttype < -22) { - -/* Failed twice. Play it safe. */ - - tau = 0.f; - } else if (dmin1 > 0.f) { - -/* Late failure. Gives excellent shift. */ - - tau = (tau + *dmin__) * (1.f - eps * 2.f); - ttype += -11; - } else { - -/* Early failure. Divide by 4. */ - - tau *= .25f; - ttype += -12; - } - goto L80; - } else if (*dmin__ != *dmin__) { - -/* NaN. */ - - tau = 0.f; - goto L80; - } else { - -/* Possible underflow. Play it safe. */ - - goto L90; - } - } - -/* Risk of underflow. */ - -L90: - slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2); - *ndiv += *n0 - *i0 + 2; - ++(*iter); - tau = 0.f; - -L100: - if (tau < *sigma) { - *desig += tau; - t = *sigma + *desig; - *desig -= t - *sigma; - } else { - t = *sigma + tau; - *desig = *sigma - (t - tau) + *desig; - } - *sigma = t; - - return 0; - -/* End of SLASQ3 */ - -} /* slasq3_ */ - -/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp, - integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, - real *dn1, real *dn2, real *tau, integer *ttype) -{ - /* Initialized data */ - - static real g = 0.f; - - /* System generated locals */ - integer i__1; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real s, a2, b1, b2; - static integer i4, nn, np; - static real gam, gap1, gap2; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASQ4 computes an approximation TAU to the smallest eigenvalue - using values of d from the previous transform. - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) REAL array, dimension ( 4*N ) - Z holds the qd array. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - NOIN (input) INTEGER - The value of N0 at start of EIGTEST. - - DMIN (input) REAL - Minimum value of d. - - DMIN1 (input) REAL - Minimum value of d, excluding D( N0 ). - - DMIN2 (input) REAL - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (input) REAL - d(N) - - DN1 (input) REAL - d(N-1) - - DN2 (input) REAL - d(N-2) - - TAU (output) REAL - This is the shift. - - TTYPE (output) INTEGER - Shift type. - - Further Details - =============== - CNST1 = 9/16 - - ===================================================================== -*/ - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - -/* - A negative DMIN forces the shift to take that absolute value - TTYPE records the type of shift. -*/ - - if (*dmin__ <= 0.f) { - *tau = -(*dmin__); - *ttype = -1; - return 0; - } - - nn = (*n0 << 2) + *pp; - if (*n0in == *n0) { - -/* No eigenvalues deflated. */ - - if (*dmin__ == *dn || *dmin__ == *dn1) { - - b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); - b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); - a2 = z__[nn - 7] + z__[nn - 5]; - -/* Cases 2 and 3. */ - - if (*dmin__ == *dn && *dmin1 == *dn1) { - gap2 = *dmin2 - a2 - *dmin2 * .25f; - if (gap2 > 0.f && gap2 > b2) { - gap1 = a2 - *dn - b2 / gap2 * b2; - } else { - gap1 = a2 - *dn - (b1 + b2); - } - if (gap1 > 0.f && gap1 > b1) { -/* Computing MAX */ - r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f; - s = dmax(r__1,r__2); - *ttype = -2; - } else { - s = 0.f; - if (*dn > b1) { - s = *dn - b1; - } - if (a2 > b1 + b2) { -/* Computing MIN */ - r__1 = s, r__2 = a2 - (b1 + b2); - s = dmin(r__1,r__2); - } -/* Computing MAX */ - r__1 = s, r__2 = *dmin__ * .333f; - s = dmax(r__1,r__2); - *ttype = -3; - } - } else { - -/* Case 4. */ - - *ttype = -4; - s = *dmin__ * .25f; - if (*dmin__ == *dn) { - gam = *dn; - a2 = 0.f; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b2 = z__[nn - 5] / z__[nn - 7]; - np = nn - 9; - } else { - np = nn - (*pp << 1); - b2 = z__[np - 2]; - gam = *dn1; - if (z__[np - 4] > z__[np - 2]) { - return 0; - } - a2 = z__[np - 4] / z__[np - 2]; - if (z__[nn - 9] > z__[nn - 11]) { - return 0; - } - b2 = z__[nn - 9] / z__[nn - 11]; - np = nn - 13; - } - -/* Approximate contribution to norm squared from I < NN-1. */ - - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = np; i4 >= i__1; i4 += -4) { - if (b2 == 0.f) { - goto L20; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { - goto L20; - } -/* L10: */ - } -L20: - a2 *= 1.05f; - -/* Rayleigh quotient residual bound. */ - - if (a2 < .563f) { - s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); - } - } - } else if (*dmin__ == *dn2) { - -/* Case 5. */ - - *ttype = -5; - s = *dmin__ * .25f; - -/* Compute contribution to norm squared from I > NN-2. */ - - np = nn - (*pp << 1); - b1 = z__[np - 2]; - b2 = z__[np - 6]; - gam = *dn2; - if (z__[np - 8] > b2 || z__[np - 4] > b1) { - return 0; - } - a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f); - -/* Approximate contribution to norm squared from I < NN-2. */ - - if (*n0 - *i0 > 2) { - b2 = z__[nn - 13] / z__[nn - 15]; - a2 += b2; - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = nn - 17; i4 >= i__1; i4 += -4) { - if (b2 == 0.f) { - goto L40; - } - b1 = b2; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b2 *= z__[i4] / z__[i4 - 2]; - a2 += b2; - if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { - goto L40; - } -/* L30: */ - } -L40: - a2 *= 1.05f; - } - - if (a2 < .563f) { - s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); - } - } else { - -/* Case 6, no information to guide us. */ - - if (*ttype == -6) { - g += (1.f - g) * .333f; - } else if (*ttype == -18) { - g = .083250000000000005f; - } else { - g = .25f; - } - s = g * *dmin__; - *ttype = -6; - } - - } else if (*n0in == *n0 + 1) { - -/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ - - if (*dmin1 == *dn1 && *dmin2 == *dn2) { - -/* Cases 7 and 8. */ - - *ttype = -7; - s = *dmin1 * .333f; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.f) { - goto L60; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - a2 = b1; - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (dmax(b1,a2) * 100.f < b2) { - goto L60; - } -/* L50: */ - } -L60: - b2 = sqrt(b2 * 1.05f); -/* Computing 2nd power */ - r__1 = b2; - a2 = *dmin1 / (r__1 * r__1 + 1.f); - gap2 = *dmin2 * .5f - a2; - if (gap2 > 0.f && gap2 > b2 * a2) { -/* Computing MAX */ - r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); - s = dmax(r__1,r__2); - } else { -/* Computing MAX */ - r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); - s = dmax(r__1,r__2); - *ttype = -8; - } - } else { - -/* Case 9. */ - - s = *dmin1 * .25f; - if (*dmin1 == *dn1) { - s = *dmin1 * .5f; - } - *ttype = -9; - } - - } else if (*n0in == *n0 + 2) { - -/* - Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. - - Cases 10 and 11. -*/ - - if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) { - *ttype = -10; - s = *dmin2 * .333f; - if (z__[nn - 5] > z__[nn - 7]) { - return 0; - } - b1 = z__[nn - 5] / z__[nn - 7]; - b2 = b1; - if (b2 == 0.f) { - goto L80; - } - i__1 = (*i0 << 2) - 1 + *pp; - for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { - if (z__[i4] > z__[i4 - 2]) { - return 0; - } - b1 *= z__[i4] / z__[i4 - 2]; - b2 += b1; - if (b1 * 100.f < b2) { - goto L80; - } -/* L70: */ - } -L80: - b2 = sqrt(b2 * 1.05f); -/* Computing 2nd power */ - r__1 = b2; - a2 = *dmin2 / (r__1 * r__1 + 1.f); - gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ - nn - 9]) - a2; - if (gap2 > 0.f && gap2 > b2 * a2) { -/* Computing MAX */ - r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); - s = dmax(r__1,r__2); - } else { -/* Computing MAX */ - r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); - s = dmax(r__1,r__2); - } - } else { - s = *dmin2 * .25f; - *ttype = -11; - } - } else if (*n0in > *n0 + 2) { - -/* Case 12, more than two eigenvalues deflated. No information. */ - - s = 0.f; - *ttype = -12; - } - - *tau = s; - return 0; - -/* End of SLASQ4 */ - -} /* slasq4_ */ - -/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp, - real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * - dnm1, real *dnm2, logical *ieee) -{ - /* System generated locals */ - integer i__1; - real r__1, r__2; - - /* Local variables */ - static real d__; - static integer j4, j4p2; - static real emin, temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - May 17, 2000 - - - Purpose - ======= - - SLASQ5 computes one dqds transform in ping-pong form, one - version for IEEE machines another for non IEEE machines. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) REAL array, dimension ( 4*N ) - Z holds the qd array. EMIN is stored in Z(4*N0) to avoid - an extra argument. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - TAU (input) REAL - This is the shift. - - DMIN (output) REAL - Minimum value of d. - - DMIN1 (output) REAL - Minimum value of d, excluding D( N0 ). - - DMIN2 (output) REAL - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (output) REAL - d(N0), the last value of d. - - DNM1 (output) REAL - d(N0-1). - - DNM2 (output) REAL - d(N0-2). - - IEEE (input) LOGICAL - Flag for IEEE or non IEEE arithmetic. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4] - *tau; - *dmin__ = d__; - *dmin1 = -z__[j4]; - - if (*ieee) { - -/* Code for IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - temp = z__[j4 + 1] / z__[j4 - 2]; - d__ = d__ * temp - *tau; - *dmin__ = dmin(*dmin__,d__); - z__[j4] = z__[j4 - 1] * temp; -/* Computing MIN */ - r__1 = z__[j4]; - emin = dmin(r__1,emin); -/* L10: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - temp = z__[j4 + 2] / z__[j4 - 3]; - d__ = d__ * temp - *tau; - *dmin__ = dmin(*dmin__,d__); - z__[j4 - 1] = z__[j4] * temp; -/* Computing MIN */ - r__1 = z__[j4 - 1]; - emin = dmin(r__1,emin); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - *dmin__ = dmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - *dmin__ = dmin(*dmin__,*dn); - - } else { - -/* Code for non IEEE arithmetic. */ - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (d__ < 0.f) { - return 0; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; - } - *dmin__ = dmin(*dmin__,d__); -/* Computing MIN */ - r__1 = emin, r__2 = z__[j4]; - emin = dmin(r__1,r__2); -/* L30: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (d__ < 0.f) { - return 0; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; - } - *dmin__ = dmin(*dmin__,d__); -/* Computing MIN */ - r__1 = emin, r__2 = z__[j4 - 1]; - emin = dmin(r__1,r__2); -/* L40: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (*dnm2 < 0.f) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; - } - *dmin__ = dmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (*dnm1 < 0.f) { - return 0; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; - } - *dmin__ = dmin(*dmin__,*dn); - - } - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of SLASQ5 */ - -} /* slasq5_ */ - -/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp, - real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * - dnm2) -{ - /* System generated locals */ - integer i__1; - real r__1, r__2; - - /* Local variables */ - static real d__; - static integer j4, j4p2; - static real emin, temp; - extern doublereal slamch_(char *); - static real safmin; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - SLASQ6 computes one dqd (shift equal to zero) transform in - ping-pong form, with protection against underflow and overflow. - - Arguments - ========= - - I0 (input) INTEGER - First index. - - N0 (input) INTEGER - Last index. - - Z (input) REAL array, dimension ( 4*N ) - Z holds the qd array. EMIN is stored in Z(4*N0) to avoid - an extra argument. - - PP (input) INTEGER - PP=0 for ping, PP=1 for pong. - - DMIN (output) REAL - Minimum value of d. - - DMIN1 (output) REAL - Minimum value of d, excluding D( N0 ). - - DMIN2 (output) REAL - Minimum value of d, excluding D( N0 ) and D( N0-1 ). - - DN (output) REAL - d(N0), the last value of d. - - DNM1 (output) REAL - d(N0-1). - - DNM2 (output) REAL - d(N0-2). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --z__; - - /* Function Body */ - if (*n0 - *i0 - 1 <= 0) { - return 0; - } - - safmin = slamch_("Safe minimum"); - j4 = (*i0 << 2) + *pp - 3; - emin = z__[j4 + 4]; - d__ = z__[j4]; - *dmin__ = d__; - - if (*pp == 0) { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 2] = d__ + z__[j4 - 1]; - if (z__[j4 - 2] == 0.f) { - z__[j4] = 0.f; - d__ = z__[j4 + 1]; - *dmin__ = d__; - emin = 0.f; - } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 - - 2] < z__[j4 + 1]) { - temp = z__[j4 + 1] / z__[j4 - 2]; - z__[j4] = z__[j4 - 1] * temp; - d__ *= temp; - } else { - z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); - d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); - } - *dmin__ = dmin(*dmin__,d__); -/* Computing MIN */ - r__1 = emin, r__2 = z__[j4]; - emin = dmin(r__1,r__2); -/* L10: */ - } - } else { - i__1 = *n0 - 3 << 2; - for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { - z__[j4 - 3] = d__ + z__[j4]; - if (z__[j4 - 3] == 0.f) { - z__[j4 - 1] = 0.f; - d__ = z__[j4 + 2]; - *dmin__ = d__; - emin = 0.f; - } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 - - 3] < z__[j4 + 2]) { - temp = z__[j4 + 2] / z__[j4 - 3]; - z__[j4 - 1] = z__[j4] * temp; - d__ *= temp; - } else { - z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); - d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); - } - *dmin__ = dmin(*dmin__,d__); -/* Computing MIN */ - r__1 = emin, r__2 = z__[j4 - 1]; - emin = dmin(r__1,r__2); -/* L20: */ - } - } - -/* Unroll last two steps. */ - - *dnm2 = d__; - *dmin2 = *dmin__; - j4 = (*n0 - 2 << 2) - *pp; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm2 + z__[j4p2]; - if (z__[j4 - 2] == 0.f) { - z__[j4] = 0.f; - *dnm1 = z__[j4p2 + 2]; - *dmin__ = *dnm1; - emin = 0.f; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dnm1 = *dnm2 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); - } - *dmin__ = dmin(*dmin__,*dnm1); - - *dmin1 = *dmin__; - j4 += 4; - j4p2 = j4 + (*pp << 1) - 1; - z__[j4 - 2] = *dnm1 + z__[j4p2]; - if (z__[j4 - 2] == 0.f) { - z__[j4] = 0.f; - *dn = z__[j4p2 + 2]; - *dmin__ = *dn; - emin = 0.f; - } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < - z__[j4p2 + 2]) { - temp = z__[j4p2 + 2] / z__[j4 - 2]; - z__[j4] = z__[j4p2] * temp; - *dn = *dnm1 * temp; - } else { - z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); - *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); - } - *dmin__ = dmin(*dmin__,*dn); - - z__[j4 + 2] = *dn; - z__[(*n0 << 2) - *pp] = emin; - return 0; - -/* End of SLASQ6 */ - -} /* slasq6_ */ - -/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m, - integer *n, real *c__, real *s, real *a, integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, info; - static real temp; - extern logical lsame_(char *, char *); - static real ctemp, stemp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLASR performs the transformation - - A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) - - A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) - - where A is an m by n real matrix and P is an orthogonal matrix, - consisting of a sequence of plane rotations determined by the - parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' - and z = n when SIDE = 'R' or 'r' ): - - When DIRECT = 'F' or 'f' ( Forward sequence ) then - - P = P( z - 1 )*...*P( 2 )*P( 1 ), - - and when DIRECT = 'B' or 'b' ( Backward sequence ) then - - P = P( 1 )*P( 2 )*...*P( z - 1 ), - - where P( k ) is a plane rotation matrix for the following planes: - - when PIVOT = 'V' or 'v' ( Variable pivot ), - the plane ( k, k + 1 ) - - when PIVOT = 'T' or 't' ( Top pivot ), - the plane ( 1, k + 1 ) - - when PIVOT = 'B' or 'b' ( Bottom pivot ), - the plane ( k, z ) - - c( k ) and s( k ) must contain the cosine and sine that define the - matrix P( k ). The two by two plane rotation part of the matrix - P( k ), R( k ), is assumed to be of the form - - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - This version vectorises across rows of the array A when SIDE = 'L'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - Specifies whether the plane rotation matrix P is applied to - A on the left or the right. - = 'L': Left, compute A := P*A - = 'R': Right, compute A:= A*P' - - DIRECT (input) CHARACTER*1 - Specifies whether P is a forward or backward sequence of - plane rotations. - = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) - = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) - - PIVOT (input) CHARACTER*1 - Specifies the plane for which P(k) is a plane rotation - matrix. - = 'V': Variable pivot, the plane (k,k+1) - = 'T': Top pivot, the plane (1,k+1) - = 'B': Bottom pivot, the plane (k,z) - - M (input) INTEGER - The number of rows of the matrix A. If m <= 1, an immediate - return is effected. - - N (input) INTEGER - The number of columns of the matrix A. If n <= 1, an - immediate return is effected. - - C, S (input) REAL arrays, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - c(k) and s(k) contain the cosine and sine that define the - matrix P(k). The two by two plane rotation part of the - matrix P(k), R(k), is assumed to be of the form - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - A (input/output) REAL array, dimension (LDA,N) - The m by n matrix A. On exit, A is overwritten by P*A if - SIDE = 'R' or by A*P' if SIDE = 'L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - --c__; - --s; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! (lsame_(side, "L") || lsame_(side, "R"))) { - info = 1; - } else if (! (lsame_(pivot, "V") || lsame_(pivot, - "T") || lsame_(pivot, "B"))) { - info = 2; - } else if (! (lsame_(direct, "F") || lsame_(direct, - "B"))) { - info = 3; - } else if (*m < 0) { - info = 4; - } else if (*n < 0) { - info = 5; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("SLASR ", &info); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form P * A */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + 1 + i__ * a_dim1]; - a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * - a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j - + i__ * a_dim1]; -/* L10: */ - } - } -/* L20: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + 1 + i__ * a_dim1]; - a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * - a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j - + i__ * a_dim1]; -/* L30: */ - } - } -/* L40: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *m; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ - i__ * a_dim1 + 1]; - a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ - i__ * a_dim1 + 1]; -/* L50: */ - } - } -/* L60: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ - i__ * a_dim1 + 1]; - a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ - i__ * a_dim1 + 1]; -/* L70: */ - } - } -/* L80: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] - + ctemp * temp; - a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * - a_dim1] - stemp * temp; -/* L90: */ - } - } -/* L100: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[j + i__ * a_dim1]; - a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] - + ctemp * temp; - a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * - a_dim1] - stemp * temp; -/* L110: */ - } - } -/* L120: */ - } - } - } - } else if (lsame_(side, "R")) { - -/* Form A * P' */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + (j + 1) * a_dim1]; - a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * - a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ - i__ + j * a_dim1]; -/* L130: */ - } - } -/* L140: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + (j + 1) * a_dim1]; - a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * - a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ - i__ + j * a_dim1]; -/* L150: */ - } - } -/* L160: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ - i__ + a_dim1]; - a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + - a_dim1]; -/* L170: */ - } - } -/* L180: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ - i__ + a_dim1]; - a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + - a_dim1]; -/* L190: */ - } - } -/* L200: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] - + ctemp * temp; - a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * - a_dim1] - stemp * temp; -/* L210: */ - } - } -/* L220: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1.f || stemp != 0.f) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - temp = a[i__ + j * a_dim1]; - a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] - + ctemp * temp; - a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * - a_dim1] - stemp * temp; -/* L230: */ - } - } -/* L240: */ - } - } - } - } - - return 0; - -/* End of SLASR */ - -} /* slasr_ */ - -/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info) -{ - /* System generated locals */ - integer i__1, i__2; - - /* Local variables */ - static integer i__, j; - static real d1, d2, d3; - static integer dir; - static real tmp; - static integer endd; - extern logical lsame_(char *, char *); - static integer stack[64] /* was [2][32] */; - static real dmnmx; - static integer start; - extern /* Subroutine */ int xerbla_(char *, integer *); - static integer stkpnt; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - Sort the numbers in D in increasing order (if ID = 'I') or - in decreasing order (if ID = 'D' ). - - Use Quick Sort, reverting to Insertion sort on arrays of - size <= 20. Dimension of STACK limits N to about 2**32. - - Arguments - ========= - - ID (input) CHARACTER*1 - = 'I': sort D in increasing order; - = 'D': sort D in decreasing order. - - N (input) INTEGER - The length of the array D. - - D (input/output) REAL array, dimension (N) - On entry, the array to be sorted. - On exit, D has been sorted into increasing order - (D(1) <= ... <= D(N) ) or into decreasing order - (D(1) >= ... >= D(N) ), depending on ID. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input paramters. -*/ - - /* Parameter adjustments */ - --d__; - - /* Function Body */ - *info = 0; - dir = -1; - if (lsame_(id, "D")) { - dir = 0; - } else if (lsame_(id, "I")) { - dir = 1; - } - if (dir == -1) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLASRT", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 1) { - return 0; - } - - stkpnt = 1; - stack[0] = 1; - stack[1] = *n; -L10: - start = stack[(stkpnt << 1) - 2]; - endd = stack[(stkpnt << 1) - 1]; - --stkpnt; - if (endd - start <= 20 && endd - start > 0) { - -/* Do Insertion sort on D( START:ENDD ) */ - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] > d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L30; - } -/* L20: */ - } -L30: - ; - } - - } else { - -/* Sort into increasing order */ - - i__1 = endd; - for (i__ = start + 1; i__ <= i__1; ++i__) { - i__2 = start + 1; - for (j = i__; j >= i__2; --j) { - if (d__[j] < d__[j - 1]) { - dmnmx = d__[j]; - d__[j] = d__[j - 1]; - d__[j - 1] = dmnmx; - } else { - goto L50; - } -/* L40: */ - } -L50: - ; - } - - } - - } else if (endd - start > 20) { - -/* - Partition D( START:ENDD ) and stack parts, largest one first - - Choose partition entry as median of 3 -*/ - - d1 = d__[start]; - d2 = d__[endd]; - i__ = (start + endd) / 2; - d3 = d__[i__]; - if (d1 < d2) { - if (d3 < d1) { - dmnmx = d1; - } else if (d3 < d2) { - dmnmx = d3; - } else { - dmnmx = d2; - } - } else { - if (d3 < d2) { - dmnmx = d2; - } else if (d3 < d1) { - dmnmx = d3; - } else { - dmnmx = d1; - } - } - - if (dir == 0) { - -/* Sort into decreasing order */ - - i__ = start - 1; - j = endd + 1; -L60: -L70: - --j; - if (d__[j] < dmnmx) { - goto L70; - } -L80: - ++i__; - if (d__[i__] > dmnmx) { - goto L80; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L60; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } else { - -/* Sort into increasing order */ - - i__ = start - 1; - j = endd + 1; -L90: -L100: - --j; - if (d__[j] > dmnmx) { - goto L100; - } -L110: - ++i__; - if (d__[i__] < dmnmx) { - goto L110; - } - if (i__ < j) { - tmp = d__[i__]; - d__[i__] = d__[j]; - d__[j] = tmp; - goto L90; - } - if (j - start > endd - j - 1) { - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - } else { - ++stkpnt; - stack[(stkpnt << 1) - 2] = j + 1; - stack[(stkpnt << 1) - 1] = endd; - ++stkpnt; - stack[(stkpnt << 1) - 2] = start; - stack[(stkpnt << 1) - 1] = j; - } - } - } - if (stkpnt > 0) { - goto L10; - } - return 0; - -/* End of SLASRT */ - -} /* slasrt_ */ - -/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale, - real *sumsq) -{ - /* System generated locals */ - integer i__1, i__2; - real r__1; - - /* Local variables */ - static integer ix; - static real absxi; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASSQ returns the values scl and smsq such that - - ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, - - where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is - assumed to be non-negative and scl returns the value - - scl = max( scale, abs( x( i ) ) ). - - scale and sumsq must be supplied in SCALE and SUMSQ and - scl and smsq are overwritten on SCALE and SUMSQ respectively. - - The routine makes only one pass through the vector x. - - Arguments - ========= - - N (input) INTEGER - The number of elements to be used from the vector X. - - X (input) REAL array, dimension (N) - The vector for which a scaled sum of squares is computed. - x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. - - INCX (input) INTEGER - The increment between successive values of the vector X. - INCX > 0. - - SCALE (input/output) REAL - On entry, the value scale in the equation above. - On exit, SCALE is overwritten with scl , the scaling factor - for the sum of squares. - - SUMSQ (input/output) REAL - On entry, the value sumsq in the equation above. - On exit, SUMSQ is overwritten with smsq , the basic sum of - squares from which scl has been factored out. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n > 0) { - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - if (x[ix] != 0.f) { - absxi = (r__1 = x[ix], dabs(r__1)); - if (*scale < absxi) { -/* Computing 2nd power */ - r__1 = *scale / absxi; - *sumsq = *sumsq * (r__1 * r__1) + 1; - *scale = absxi; - } else { -/* Computing 2nd power */ - r__1 = absxi / *scale; - *sumsq += r__1 * r__1; - } - } -/* L10: */ - } - } - return 0; - -/* End of SLASSQ */ - -} /* slassq_ */ - -/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real * - ssmax, real *snr, real *csr, real *snl, real *csl) -{ - /* System generated locals */ - real r__1; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, - crt, slt, srt; - static integer pmax; - static real temp; - static logical swap; - static real tsign; - static logical gasmal; - extern doublereal slamch_(char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLASV2 computes the singular value decomposition of a 2-by-2 - triangular matrix - [ F G ] - [ 0 H ]. - On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the - smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and - right singular vectors for abs(SSMAX), giving the decomposition - - [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] - [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. - - Arguments - ========= - - F (input) REAL - The (1,1) element of the 2-by-2 matrix. - - G (input) REAL - The (1,2) element of the 2-by-2 matrix. - - H (input) REAL - The (2,2) element of the 2-by-2 matrix. - - SSMIN (output) REAL - abs(SSMIN) is the smaller singular value. - - SSMAX (output) REAL - abs(SSMAX) is the larger singular value. - - SNL (output) REAL - CSL (output) REAL - The vector (CSL, SNL) is a unit left singular vector for the - singular value abs(SSMAX). - - SNR (output) REAL - CSR (output) REAL - The vector (CSR, SNR) is a unit right singular vector for the - singular value abs(SSMAX). - - Further Details - =============== - - Any input parameter may be aliased with any output parameter. - - Barring over/underflow and assuming a guard digit in subtraction, all - output quantities are correct to within a few units in the last - place (ulps). - - In IEEE arithmetic, the code works correctly if one matrix element is - infinite. - - Overflow will not occur unless the largest singular value itself - overflows or is within a few ulps of overflow. (On machines with - partial overflow, like the Cray, overflow may occur if the largest - singular value is within a factor of 2 of overflow.) - - Underflow is harmless if underflow is gradual. Otherwise, results - may correspond to a matrix modified by perturbations of size near - the underflow threshold. - - ===================================================================== -*/ - - - ft = *f; - fa = dabs(ft); - ht = *h__; - ha = dabs(*h__); - -/* - PMAX points to the maximum absolute element of matrix - PMAX = 1 if F largest in absolute values - PMAX = 2 if G largest in absolute values - PMAX = 3 if H largest in absolute values -*/ - - pmax = 1; - swap = ha > fa; - if (swap) { - pmax = 3; - temp = ft; - ft = ht; - ht = temp; - temp = fa; - fa = ha; - ha = temp; - -/* Now FA .ge. HA */ - - } - gt = *g; - ga = dabs(gt); - if (ga == 0.f) { - -/* Diagonal matrix */ - - *ssmin = ha; - *ssmax = fa; - clt = 1.f; - crt = 1.f; - slt = 0.f; - srt = 0.f; - } else { - gasmal = TRUE_; - if (ga > fa) { - pmax = 2; - if (fa / ga < slamch_("EPS")) { - -/* Case of very large GA */ - - gasmal = FALSE_; - *ssmax = ga; - if (ha > 1.f) { - *ssmin = fa / (ga / ha); - } else { - *ssmin = fa / ga * ha; - } - clt = 1.f; - slt = ht / gt; - srt = 1.f; - crt = ft / gt; - } - } - if (gasmal) { - -/* Normal case */ - - d__ = fa - ha; - if (d__ == fa) { - -/* Copes with infinite F or H */ - - l = 1.f; - } else { - l = d__ / fa; - } - -/* Note that 0 .le. L .le. 1 */ - - m = gt / ft; - -/* Note that abs(M) .le. 1/macheps */ - - t = 2.f - l; - -/* Note that T .ge. 1 */ - - mm = m * m; - tt = t * t; - s = sqrt(tt + mm); - -/* Note that 1 .le. S .le. 1 + 1/macheps */ - - if (l == 0.f) { - r__ = dabs(m); - } else { - r__ = sqrt(l * l + mm); - } - -/* Note that 0 .le. R .le. 1 + 1/macheps */ - - a = (s + r__) * .5f; - -/* Note that 1 .le. A .le. 1 + abs(M) */ - - *ssmin = ha / a; - *ssmax = fa * a; - if (mm == 0.f) { - -/* Note that M is very tiny */ - - if (l == 0.f) { - t = r_sign(&c_b8920, &ft) * r_sign(&c_b871, >); - } else { - t = gt / r_sign(&d__, &ft) + m / t; - } - } else { - t = (m / (s + t) + m / (r__ + l)) * (a + 1.f); - } - l = sqrt(t * t + 4.f); - crt = 2.f / l; - srt = t / l; - clt = (crt + srt * m) / a; - slt = ht / ft * srt / a; - } - } - if (swap) { - *csl = srt; - *snl = crt; - *csr = slt; - *snr = clt; - } else { - *csl = clt; - *snl = slt; - *csr = crt; - *snr = srt; - } - -/* Correct signs of SSMAX and SSMIN */ - - if (pmax == 1) { - tsign = r_sign(&c_b871, csr) * r_sign(&c_b871, csl) * r_sign(&c_b871, - f); - } - if (pmax == 2) { - tsign = r_sign(&c_b871, snr) * r_sign(&c_b871, csl) * r_sign(&c_b871, - g); - } - if (pmax == 3) { - tsign = r_sign(&c_b871, snr) * r_sign(&c_b871, snl) * r_sign(&c_b871, - h__); - } - *ssmax = r_sign(ssmax, &tsign); - r__1 = tsign * r_sign(&c_b871, f) * r_sign(&c_b871, h__); - *ssmin = r_sign(ssmin, &r__1); - return 0; - -/* End of SLASV2 */ - -} /* slasv2_ */ - -/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1, - integer *k2, integer *ipiv, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; - static real temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SLASWP performs a series of row interchanges on the matrix A. - One row interchange is initiated for each of rows K1 through K2 of A. - - Arguments - ========= - - N (input) INTEGER - The number of columns of the matrix A. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the matrix of column dimension N to which the row - interchanges will be applied. - On exit, the permuted matrix. - - LDA (input) INTEGER - The leading dimension of the array A. - - K1 (input) INTEGER - The first element of IPIV for which a row interchange will - be done. - - K2 (input) INTEGER - The last element of IPIV for which a row interchange will - be done. - - IPIV (input) INTEGER array, dimension (M*abs(INCX)) - The vector of pivot indices. Only the elements in positions - K1 through K2 of IPIV are accessed. - IPIV(K) = L implies rows K and L are to be interchanged. - - INCX (input) INTEGER - The increment between successive values of IPIV. If IPIV - is negative, the pivots are applied in reverse order. - - Further Details - =============== - - Modified by - R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA - - ===================================================================== - - - Interchange row I with row IPIV(I) for each of rows K1 through K2. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - if (*incx > 0) { - ix0 = *k1; - i1 = *k1; - i2 = *k2; - inc = 1; - } else if (*incx < 0) { - ix0 = (1 - *k2) * *incx + 1; - i1 = *k2; - i2 = *k1; - inc = -1; - } else { - return 0; - } - - n32 = *n / 32 << 5; - if (n32 != 0) { - i__1 = n32; - for (j = 1; j <= i__1; j += 32) { - ix = ix0; - i__2 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) - { - ip = ipiv[ix]; - if (ip != i__) { - i__4 = j + 31; - for (k = j; k <= i__4; ++k) { - temp = a[i__ + k * a_dim1]; - a[i__ + k * a_dim1] = a[ip + k * a_dim1]; - a[ip + k * a_dim1] = temp; -/* L10: */ - } - } - ix += *incx; -/* L20: */ - } -/* L30: */ - } - } - if (n32 != *n) { - ++n32; - ix = ix0; - i__1 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { - ip = ipiv[ix]; - if (ip != i__) { - i__2 = *n; - for (k = n32; k <= i__2; ++k) { - temp = a[i__ + k * a_dim1]; - a[i__ + k * a_dim1] = a[ip + k * a_dim1]; - a[ip + k * a_dim1] = temp; -/* L40: */ - } - } - ix += *incx; -/* L50: */ - } - } - - return 0; - -/* End of SLASWP */ - -} /* slaswp_ */ - -/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a, - integer *lda, real *e, real *tau, real *w, integer *ldw) -{ - /* System generated locals */ - integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, iw; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - static real alpha; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sgemv_(char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *), saxpy_( - integer *, real *, real *, integer *, real *, integer *), ssymv_( - char *, integer *, real *, real *, integer *, real *, integer *, - real *, real *, integer *), slarfg_(integer *, real *, - real *, integer *, real *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SLATRD reduces NB rows and columns of a real symmetric matrix A to - symmetric tridiagonal form by an orthogonal similarity - transformation Q' * A * Q, and returns the matrices V and W which are - needed to apply the transformation to the unreduced part of A. - - If UPLO = 'U', SLATRD reduces the last NB rows and columns of a - matrix, of which the upper triangle is supplied; - if UPLO = 'L', SLATRD reduces the first NB rows and columns of a - matrix, of which the lower triangle is supplied. - - This is an auxiliary routine called by SSYTRD. - - Arguments - ========= - - UPLO (input) CHARACTER - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. - - NB (input) INTEGER - The number of rows and columns to be reduced. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit: - if UPLO = 'U', the last NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements above the diagonal - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors; - if UPLO = 'L', the first NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements below the diagonal - with the array TAU, represent the orthogonal matrix Q as a - product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= (1,N). - - E (output) REAL array, dimension (N-1) - If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal - elements of the last NB columns of the reduced matrix; - if UPLO = 'L', E(1:nb) contains the subdiagonal elements of - the first NB columns of the reduced matrix. - - TAU (output) REAL array, dimension (N-1) - The scalar factors of the elementary reflectors, stored in - TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. - See Further Details. - - W (output) REAL array, dimension (LDW,NB) - The n-by-nb matrix W required to update the unreduced part - of A. - - LDW (input) INTEGER - The leading dimension of the array W. LDW >= max(1,N). - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n) H(n-1) . . . H(n-nb+1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), - and tau in TAU(i-1). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), - and tau in TAU(i). - - The elements of the vectors v together form the n-by-nb matrix V - which is needed, with W, to apply the transformation to the unreduced - part of the matrix, using a symmetric rank-2k update of the form: - A := A - V*W' - W*V'. - - The contents of A on exit are illustrated by the following examples - with n = 5 and nb = 2: - - if UPLO = 'U': if UPLO = 'L': - - ( a a a v4 v5 ) ( d ) - ( a a v4 v5 ) ( 1 d ) - ( a 1 v5 ) ( v1 1 a ) - ( d 1 ) ( v1 v2 a a ) - ( d ) ( v1 v2 a a a ) - - where d denotes a diagonal element of the reduced matrix, a denotes - an element of the original matrix that is unchanged, and vi denotes - an element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --e; - --tau; - w_dim1 = *ldw; - w_offset = 1 + w_dim1; - w -= w_offset; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - - if (lsame_(uplo, "U")) { - -/* Reduce last NB columns of upper triangle */ - - i__1 = *n - *nb + 1; - for (i__ = *n; i__ >= i__1; --i__) { - iw = i__ - *n + *nb; - if (i__ < *n) { - -/* Update A(1:i,i) */ - - i__2 = *n - i__; - sgemv_("No transpose", &i__, &i__2, &c_b1150, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & - c_b871, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - sgemv_("No transpose", &i__, &i__2, &c_b1150, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b871, &a[i__ * a_dim1 + 1], &c__1); - } - if (i__ > 1) { - -/* - Generate elementary reflector H(i) to annihilate - A(1:i-2,i) -*/ - - i__2 = i__ - 1; - slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + - 1], &c__1, &tau[i__ - 1]); - e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; - a[i__ - 1 + i__ * a_dim1] = 1.f; - -/* Compute W(1:i-1,i) */ - - i__2 = i__ - 1; - ssymv_("Upper", &i__2, &c_b871, &a[a_offset], lda, &a[i__ * - a_dim1 + 1], &c__1, &c_b1101, &w[iw * w_dim1 + 1], & - c__1); - if (i__ < *n) { - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & - c_b1101, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[(i__ + - 1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], - &c__1, &c_b871, &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & - c_b1101, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[(iw + 1) - * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b871, &w[iw * w_dim1 + 1], &c__1); - } - i__2 = i__ - 1; - sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1] - , &c__1, &a[i__ * a_dim1 + 1], &c__1); - i__2 = i__ - 1; - saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * - w_dim1 + 1], &c__1); - } - -/* L10: */ - } - } else { - -/* Reduce first NB columns of lower triangle */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + a_dim1], - lda, &w[i__ + w_dim1], ldw, &c_b871, &a[i__ + i__ * - a_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[i__ + w_dim1], - ldw, &a[i__ + a_dim1], lda, &c_b871, &a[i__ + i__ * - a_dim1], &c__1); - if (i__ < *n) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:n,i) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + - i__ * a_dim1], &c__1, &tau[i__]); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - a[i__ + 1 + i__ * a_dim1] = 1.f; - -/* Compute W(i+1:n,i) */ - - i__2 = *n - i__; - ssymv_("Lower", &i__2, &c_b871, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b1101, &w[i__ + 1 + i__ * w_dim1], &c__1) - ; - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &w[i__ + 1 + - w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b1101, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 + - a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b871, & - w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b1101, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[i__ + 1 + - w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b871, & - w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ * - w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - i__2 = *n - i__; - saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - } - -/* L20: */ - } - } - - return 0; - -/* End of SLATRD */ - -} /* slatrd_ */ - -/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - static real aii; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sgemv_(char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLAUU2 computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the unblocked form of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAUU2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - aii = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - i__2 = *n - i__ + 1; - a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], - lda, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - aii, &a[i__ * a_dim1 + 1], &c__1); - } else { - sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); - } -/* L10: */ - } - - } else { - -/* Compute the product L' * L. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - aii = a[i__ + i__ * a_dim1]; - if (i__ < *n) { - i__2 = *n - i__ + 1; - a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], & - c__1, &a[i__ + i__ * a_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, - &a[i__ + a_dim1], lda); - } else { - sscal_(&i__, &aii, &a[i__ + a_dim1], lda); - } -/* L20: */ - } - } - - return 0; - -/* End of SLAUU2 */ - -} /* slauu2_ */ - -/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, ib, nb; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static logical upper; - extern /* Subroutine */ int strmm_(char *, char *, char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer * - ), ssyrk_(char *, char *, integer - *, integer *, real *, real *, integer *, real *, real *, integer * - ), slauu2_(char *, integer *, real *, integer *, - integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SLAUUM computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the blocked form of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SLAUUM", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - slauu2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, - &c_b871, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 - + 1], lda) - ; - slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & - c_b871, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ - + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__ * - a_dim1 + 1], lda); - i__3 = *n - i__ - ib + 1; - ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[ - i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__ - + i__ * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the product L' * L. */ - - i__2 = *n; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & - c_b871, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], - lda); - slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & - c_b871, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ - + ib + a_dim1], lda, &c_b871, &a[i__ + a_dim1], - lda); - i__3 = *n - i__ - ib + 1; - ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b871, &a[i__ - + ib + i__ * a_dim1], lda, &c_b871, &a[i__ + i__ * - a_dim1], lda); - } -/* L20: */ - } - } - } - - return 0; - -/* End of SLAUUM */ - -} /* slauum_ */ - -/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a, - integer *lda, real *tau, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real r__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - slarf_(char *, integer *, integer *, real *, integer *, real *, - real *, integer *, real *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SORG2R generates an m by n real matrix Q with orthonormal columns, - which is defined as the first n columns of a product of k elementary - reflectors of order m - - Q = H(1) H(2) . . . H(k) - - as returned by SGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by SGEQRF in the first k columns of its array - argument A. - On exit, the m-by-n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQRF. - - WORK (workspace) REAL array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORG2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - -/* Initialise columns k+1:n to columns of the unit matrix */ - - i__1 = *n; - for (j = *k + 1; j <= i__1; ++j) { - i__2 = *m; - for (l = 1; l <= i__2; ++l) { - a[l + j * a_dim1] = 0.f; -/* L10: */ - } - a[j + j * a_dim1] = 1.f; -/* L20: */ - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the left */ - - if (i__ < *n) { - a[i__ + i__ * a_dim1] = 1.f; - i__1 = *m - i__ + 1; - i__2 = *n - i__; - slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - } - if (i__ < *m) { - i__1 = *m - i__; - r__1 = -tau[i__]; - sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - } - a[i__ + i__ * a_dim1] = 1.f - tau[i__]; - -/* Set A(1:i-1,i) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - a[l + i__ * a_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of SORG2R */ - -} /* sorg2r_ */ - -/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k, - real *a, integer *lda, real *tau, real *work, integer *lwork, integer - *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, mn; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical wantq; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *, integer *), sorgqr_( - integer *, integer *, integer *, real *, integer *, real *, real * - , integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORGBR generates one of the real orthogonal matrices Q or P**T - determined by SGEBRD when reducing a real matrix A to bidiagonal - form: A = Q * B * P**T. Q and P**T are defined as products of - elementary reflectors H(i) or G(i) respectively. - - If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q - is of order M: - if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n - columns of Q, where m >= n >= k; - if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an - M-by-M matrix. - - If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T - is of order N: - if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m - rows of P**T, where n >= m >= k; - if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as - an N-by-N matrix. - - Arguments - ========= - - VECT (input) CHARACTER*1 - Specifies whether the matrix Q or the matrix P**T is - required, as defined in the transformation applied by SGEBRD: - = 'Q': generate Q; - = 'P': generate P**T. - - M (input) INTEGER - The number of rows of the matrix Q or P**T to be returned. - M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q or P**T to be returned. - N >= 0. - If VECT = 'Q', M >= N >= min(M,K); - if VECT = 'P', N >= M >= min(N,K). - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original M-by-K - matrix reduced by SGEBRD. - If VECT = 'P', the number of rows in the original K-by-N - matrix reduced by SGEBRD. - K >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by SGEBRD. - On exit, the M-by-N matrix Q or P**T. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (input) REAL array, dimension - (min(M,K)) if VECT = 'Q' - (min(N,K)) if VECT = 'P' - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i), which determines Q or P**T, as - returned by SGEBRD in its array argument TAUQ or TAUP. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,min(M,N)). - For optimum performance LWORK >= min(M,N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - wantq = lsame_(vect, "Q"); - mn = min(*m,*n); - lquery = *lwork == -1; - if (! wantq && ! lsame_(vect, "P")) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( - *m > *n || *m < min(*n,*k))) { - *info = -3; - } else if (*k < 0) { - *info = -4; - } else if (*lda < max(1,*m)) { - *info = -6; - } else if (*lwork < max(1,mn) && ! lquery) { - *info = -9; - } - - if (*info == 0) { - if (wantq) { - nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } else { - nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } - lwkopt = max(1,mn) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORGBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - work[1] = 1.f; - return 0; - } - - if (wantq) { - -/* - Form Q, determined by a call to SGEBRD to reduce an m-by-k - matrix -*/ - - if (*m >= *k) { - -/* If m >= k, assume m >= n >= k */ - - sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If m < k, assume m = n - - Shift the vectors which define the elementary reflectors one - column to the right, and set the first row and column of Q - to those of the unit matrix -*/ - - for (j = *m; j >= 2; --j) { - a[j * a_dim1 + 1] = 0.f; - i__1 = *m; - for (i__ = j + 1; i__ <= i__1; ++i__) { - a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; -/* L10: */ - } -/* L20: */ - } - a[a_dim1 + 1] = 1.f; - i__1 = *m; - for (i__ = 2; i__ <= i__1; ++i__) { - a[i__ + a_dim1] = 0.f; -/* L30: */ - } - if (*m > 1) { - -/* Form Q(2:m,2:m) */ - - i__1 = *m - 1; - i__2 = *m - 1; - i__3 = *m - 1; - sorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } else { - -/* - Form P', determined by a call to SGEBRD to reduce a k-by-n - matrix -*/ - - if (*k < *n) { - -/* If k < n, assume k <= m <= n */ - - sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If k >= n, assume m = n - - Shift the vectors which define the elementary reflectors one - row downward, and set the first row and column of P' to - those of the unit matrix -*/ - - a[a_dim1 + 1] = 1.f; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - a[i__ + a_dim1] = 0.f; -/* L40: */ - } - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - for (i__ = j - 1; i__ >= 2; --i__) { - a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; -/* L50: */ - } - a[j * a_dim1 + 1] = 0.f; -/* L60: */ - } - if (*n > 1) { - -/* Form P'(2:n,2:n) */ - - i__1 = *n - 1; - i__2 = *n - 1; - i__3 = *n - 1; - sorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORGBR */ - -} /* sorgbr_ */ - -/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a, - integer *lda, real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer i__, j, nb, nh, iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *, integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORGHR generates a real orthogonal matrix Q which is defined as the - product of IHI-ILO elementary reflectors of order N, as returned by - SGEHRD: - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Arguments - ========= - - N (input) INTEGER - The order of the matrix Q. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - ILO and IHI must have the same values as in the previous call - of SGEHRD. Q is equal to the unit matrix except in the - submatrix Q(ilo+1:ihi,ilo+1:ihi). - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by SGEHRD. - On exit, the N-by-N orthogonal matrix Q. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (input) REAL array, dimension (N-1) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEHRD. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= IHI-ILO. - For optimum performance LWORK >= (IHI-ILO)*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nh = *ihi - *ilo; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,nh) && ! lquery) { - *info = -8; - } - - if (*info == 0) { - nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( - ftnlen)1); - lwkopt = max(1,nh) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORGHR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1] = 1.f; - return 0; - } - -/* - Shift the vectors which define the elementary reflectors one - column to the right, and set the first ilo and the last n-ihi - rows and columns to those of the unit matrix -*/ - - i__1 = *ilo + 1; - for (j = *ihi; j >= i__1; --j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L10: */ - } - i__2 = *ihi; - for (i__ = j + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; -/* L20: */ - } - i__2 = *n; - for (i__ = *ihi + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } - i__1 = *ilo; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L50: */ - } - a[j + j * a_dim1] = 1.f; -/* L60: */ - } - i__1 = *n; - for (j = *ihi + 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L70: */ - } - a[j + j * a_dim1] = 1.f; -/* L80: */ - } - - if (nh > 0) { - -/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ - - sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* - ilo], &work[1], lwork, &iinfo); - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORGHR */ - -} /* sorghr_ */ - -/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a, - integer *lda, real *tau, real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - real r__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - slarf_(char *, integer *, integer *, real *, integer *, real *, - real *, integer *, real *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORGL2 generates an m by n real matrix Q with orthonormal rows, - which is defined as the first m rows of a product of k elementary - reflectors of order n - - Q = H(k) . . . H(2) H(1) - - as returned by SGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by SGELQF in the first k rows of its array argument A. - On exit, the m-by-n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGELQF. - - WORK (workspace) REAL array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORGL2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - return 0; - } - - if (*k < *m) { - -/* Initialise rows k+1:m to rows of the unit matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (l = *k + 1; l <= i__2; ++l) { - a[l + j * a_dim1] = 0.f; -/* L10: */ - } - if (j > *k && j <= *m) { - a[j + j * a_dim1] = 1.f; - } -/* L20: */ - } - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the right */ - - if (i__ < *n) { - if (i__ < *m) { - a[i__ + i__ * a_dim1] = 1.f; - i__1 = *m - i__; - i__2 = *n - i__ + 1; - slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & - tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__1 = *n - i__; - r__1 = -tau[i__]; - sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda); - } - a[i__ + i__ * a_dim1] = 1.f - tau[i__]; - -/* Set A(i,1:i-1) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - a[i__ + l * a_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of SORGL2 */ - -} /* sorgl2_ */ - -/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a, - integer *lda, real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *), slarfb_(char *, char *, - char *, char *, integer *, integer *, integer *, real *, integer * - , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORGLQ generates an M-by-N real matrix Q with orthonormal rows, - which is defined as the first M rows of a product of K elementary - reflectors of order N - - Q = H(k) . . . H(2) H(1) - - as returned by SGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by SGELQF in the first k rows of its array argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGELQF. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*m) * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORGLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk rows are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(kk+1:m,1:kk) to zero. */ - - i__1 = kk; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = kk + 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *m) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *n - i__ + 1; - slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i+ib:m,i:n) from the right */ - - i__2 = *m - i__ - ib + 1; - i__3 = *n - i__ + 1; - slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & - i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } - -/* Apply H' to columns i:n of current block */ - - i__2 = *n - i__ + 1; - sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set columns 1:i-1 of current block to zero */ - - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = i__ + ib - 1; - for (l = i__; l <= i__3; ++l) { - a[l + j * a_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1] = (real) iws; - return 0; - -/* End of SORGLQ */ - -} /* sorglq_ */ - -/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a, - integer *lda, real *tau, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real - *, integer *, real *, real *, integer *), slarfb_(char *, char *, - char *, char *, integer *, integer *, integer *, real *, integer * - , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORGQR generates an M-by-N real matrix Q with orthonormal columns, - which is defined as the first N columns of a product of K elementary - reflectors of order M - - Q = H(1) H(2) . . . H(k) - - as returned by SGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by SGEQRF in the first k columns of its array - argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQRF. - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*n) * nb; - work[1] = (real) lwkopt; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORGQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk columns are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(1:kk,kk+1:n) to zero. */ - - i__1 = *n; - for (j = kk + 1; j <= i__1; ++j) { - i__2 = kk; - for (i__ = 1; i__ <= i__2; ++i__) { - a[i__ + j * a_dim1] = 0.f; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *n) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *m - i__ + 1; - slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i:m,i+ib:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__ - ib + 1; - slarfb_("Left", "No transpose", "Forward", "Columnwise", & - i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & - work[ib + 1], &ldwork); - } - -/* Apply H to rows i:m of current block */ - - i__2 = *m - i__ + 1; - sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set rows 1:i-1 of current block to zero */ - - i__2 = i__ + ib - 1; - for (j = i__; j <= i__2; ++j) { - i__3 = i__ - 1; - for (l = 1; l <= i__3; ++l) { - a[l + j * a_dim1] = 0.f; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1] = (real) iws; - return 0; - -/* End of SORGQR */ - -} /* sorgqr_ */ - -/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, mi, ni, nq; - static real aii; - static logical left; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SORM2L overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQLF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) REAL array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORM2L", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(1:m-k+i,1:n) */ - - mi = *m - *k + i__; - } else { - -/* H(i) is applied to C(1:m,1:n-k+i) */ - - ni = *n - *k + i__; - } - -/* Apply H(i) */ - - aii = a[nq - *k + i__ + i__ * a_dim1]; - a[nq - *k + i__ + i__ * a_dim1] = 1.f; - slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ - c_offset], ldc, &work[1]); - a[nq - *k + i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of SORM2L */ - -} /* sorm2l_ */ - -/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static real aii; - static logical left; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SORM2R overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQRF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) REAL array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORM2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ - ic + jc * c_dim1], ldc, &work[1]); - a[i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of SORM2R */ - -} /* sorm2r_ */ - -/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m, - integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, - integer *ldc, real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran, applyq; - static char transt[1]; - extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *, integer *); - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': P * C C * P - TRANS = 'T': P**T * C C * P**T - - Here Q and P**T are the orthogonal matrices determined by SGEBRD when - reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and - P**T are defined as products of elementary reflectors H(i) and G(i) - respectively. - - Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the - order of the orthogonal matrix Q or P**T that is applied. - - If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: - if nq >= k, Q = H(1) H(2) . . . H(k); - if nq < k, Q = H(1) H(2) . . . H(nq-1). - - If VECT = 'P', A is assumed to have been a K-by-NQ matrix: - if k < nq, P = G(1) G(2) . . . G(k); - if k >= nq, P = G(1) G(2) . . . G(nq-1). - - Arguments - ========= - - VECT (input) CHARACTER*1 - = 'Q': apply Q or Q**T; - = 'P': apply P or P**T. - - SIDE (input) CHARACTER*1 - = 'L': apply Q, Q**T, P or P**T from the Left; - = 'R': apply Q, Q**T, P or P**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q or P; - = 'T': Transpose, apply Q**T or P**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original - matrix reduced by SGEBRD. - If VECT = 'P', the number of rows in the original - matrix reduced by SGEBRD. - K >= 0. - - A (input) REAL array, dimension - (LDA,min(nq,K)) if VECT = 'Q' - (LDA,nq) if VECT = 'P' - The vectors which define the elementary reflectors H(i) and - G(i), whose products determine the matrices Q and P, as - returned by SGEBRD. - - LDA (input) INTEGER - The leading dimension of the array A. - If VECT = 'Q', LDA >= max(1,nq); - if VECT = 'P', LDA >= max(1,min(nq,K)). - - TAU (input) REAL array, dimension (min(nq,K)) - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i) which determines Q or P, as returned - by SGEBRD in the array argument TAUQ or TAUP. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q - or P*C or P**T*C or C*P or C*P**T. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - applyq = lsame_(vect, "Q"); - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! applyq && ! lsame_(vect, "P")) { - *info = -1; - } else if (! left && ! lsame_(side, "R")) { - *info = -2; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*k < 0) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = 1, i__2 = min(nq,*k); - if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { - *info = -8; - } else if (*ldc < max(1,*m)) { - *info = -11; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -13; - } - } - - if (*info == 0) { - if (applyq) { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORMBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - work[1] = 1.f; - if (*m == 0 || *n == 0) { - return 0; - } - - if (applyq) { - -/* Apply Q */ - - if (nq >= *k) { - -/* Q was determined by a call to SGEBRD with nq >= k */ - - sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* Q was determined by a call to SGEBRD with nq < k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] - , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - } else { - -/* Apply P */ - - if (notran) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - if (nq > *k) { - -/* P was determined by a call to SGEBRD with nq > k */ - - sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* P was determined by a call to SGEBRD with nq <= k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - sormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, - &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & - iinfo); - } - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORMBR */ - -} /* sormbr_ */ - -/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static real aii; - static logical left; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, - integer *, real *, real *, integer *, real *), xerbla_( - char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SORML2 overwrites the general real m by n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'T', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'T', - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'T': apply Q' (Transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGELQF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) REAL array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORML2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) */ - - aii = a[i__ + i__ * a_dim1]; - a[i__ + i__ * a_dim1] = 1.f; - slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ - ic + jc * c_dim1], ldc, &work[1]); - a[i__ + i__ * a_dim1] = aii; -/* L10: */ - } - return 0; - -/* End of SORML2 */ - -} /* sorml2_ */ - -/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static real t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *), slarfb_(char *, char *, char *, char * - , integer *, integer *, integer *, real *, integer *, real *, - integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static logical notran; - static integer ldwork; - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORMLQ overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGELQF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORMLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - if (notran) { - *(unsigned char *)transt = 'T'; - } else { - *(unsigned char *)transt = 'N'; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], - lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ - + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], - ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORMLQ */ - -} /* sormlq_ */ - -/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static real t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *), slarfb_(char *, char *, char *, char * - , integer *, integer *, integer *, real *, integer *, real *, - integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORMQL overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQLF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORMQL", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i+ib-1) . . . H(i+1) H(i) -*/ - - i__4 = nq - *k + i__ + ib - 1; - slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] - , lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ - - mi = *m - *k + i__ + ib - 1; - } else { - -/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ - - ni = *n - *k + i__ + ib - 1; - } - -/* Apply H or H' */ - - slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ - i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & - work[1], &ldwork); -/* L10: */ - } - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORMQL */ - -} /* sormql_ */ - -/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n, - integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static real t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *), slarfb_(char *, char *, char *, char * - , integer *, integer *, integer *, real *, integer *, real *, - integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, - real *, integer *, real *, real *, integer *); - static logical notran; - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORMQR overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) REAL array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - SGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) REAL array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SGEQRF. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SORMQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1] = 1.f; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], t, &c__65) - ; - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ - i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * - c_dim1], ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORMQR */ - -} /* sormqr_ */ - -/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m, - integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, - real *work, integer *lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *, integer *); - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SORMTR overwrites the general real M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'T': Q**T * C C * Q**T - - where Q is a real orthogonal matrix of order nq, with nq = m if - SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of - nq-1 elementary reflectors, as returned by SSYTRD: - - if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); - - if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**T from the Left; - = 'R': apply Q or Q**T from the Right. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A contains elementary reflectors - from SSYTRD; - = 'L': Lower triangle of A contains elementary reflectors - from SSYTRD. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'T': Transpose, apply Q**T. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - A (input) REAL array, dimension - (LDA,M) if SIDE = 'L' - (LDA,N) if SIDE = 'R' - The vectors which define the elementary reflectors, as - returned by SSYTRD. - - LDA (input) INTEGER - The leading dimension of the array A. - LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. - - TAU (input) REAL array, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by SSYTRD. - - C (input/output) REAL array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! upper && ! lsame_(uplo, "L")) { - *info = -2; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "T")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - if (upper) { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__2 = -(*info); - xerbla_("SORMTR", &i__2); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || nq == 1) { - work[1] = 1.f; - return 0; - } - - if (left) { - mi = *m - 1; - ni = *n; - } else { - mi = *m; - ni = *n - 1; - } - - if (upper) { - -/* Q was determined by a call to SSYTRD with UPLO = 'U' */ - - i__2 = nq - 1; - sormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & - tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); - } else { - -/* Q was determined by a call to SSYTRD with UPLO = 'L' */ - - if (left) { - i1 = 2; - i2 = 1; - } else { - i1 = 1; - i2 = 2; - } - i__2 = nq - 1; - sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & - c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - work[1] = (real) lwkopt; - return 0; - -/* End of SORMTR */ - -} /* sormtr_ */ - -/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer j; - static real ajj; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), - sgemv_(char *, integer *, integer *, real *, real *, integer *, - real *, integer *, real *, real *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - SPOTF2 computes the Cholesky factorization of a real symmetric - positive definite matrix A. - - The factorization has the form - A = U' * U , if UPLO = 'U', or - A = L * L', if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the unblocked version of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored. - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n by n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U'*U or A = L*L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, the leading minor of order k is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SPOTF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute U(J,J) and test for non-positive-definiteness. */ - - i__2 = j - 1; - ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1, - &a[j * a_dim1 + 1], &c__1); - if (ajj <= 0.f) { - a[j + j * a_dim1] = ajj; - goto L30; - } - ajj = sqrt(ajj); - a[j + j * a_dim1] = ajj; - -/* Compute elements J+1:N of row J. */ - - if (j < *n) { - i__2 = j - 1; - i__3 = *n - j; - sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(j + 1) * - a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b871, - &a[j + (j + 1) * a_dim1], lda); - i__2 = *n - j; - r__1 = 1.f / ajj; - sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute L(J,J) and test for non-positive-definiteness. */ - - i__2 = j - 1; - ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j - + a_dim1], lda); - if (ajj <= 0.f) { - a[j + j * a_dim1] = ajj; - goto L30; - } - ajj = sqrt(ajj); - a[j + j * a_dim1] = ajj; - -/* Compute elements J+1:N of column J. */ - - if (j < *n) { - i__2 = *n - j; - i__3 = j - 1; - sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[j + 1 + - a_dim1], lda, &a[j + a_dim1], lda, &c_b871, &a[j + 1 - + j * a_dim1], &c__1); - i__2 = *n - j; - r__1 = 1.f / ajj; - sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - goto L40; - -L30: - *info = j; - -L40: - return 0; - -/* End of SPOTF2 */ - -} /* spotf2_ */ - -/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer j, jb, nb; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static logical upper; - extern /* Subroutine */ int strsm_(char *, char *, char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer * - ), ssyrk_(char *, char *, integer - *, integer *, real *, real *, integer *, real *, real *, integer * - ), spotf2_(char *, integer *, real *, integer *, - integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SPOTRF computes the Cholesky factorization of a real symmetric - positive definite matrix A. - - The factorization has the form - A = U**T * U, if UPLO = 'U', or - A = L * L**T, if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the block version of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U**T*U or A = L*L**T. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SPOTRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code. */ - - spotf2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code. */ - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b1150, &a[j * - a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1], lda); - spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block row. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & - c_b1150, &a[j * a_dim1 + 1], lda, &a[(j + jb) * - a_dim1 + 1], lda, &c_b871, &a[j + (j + jb) * - a_dim1], lda); - i__3 = *n - j - jb + 1; - strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & - i__3, &c_b871, &a[j + j * a_dim1], lda, &a[j + (j - + jb) * a_dim1], lda); - } -/* L10: */ - } - - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__2 = *n; - i__1 = nb; - for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j + - a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda); - spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block column. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & - c_b1150, &a[j + jb + a_dim1], lda, &a[j + a_dim1], - lda, &c_b871, &a[j + jb + j * a_dim1], lda); - i__3 = *n - j - jb + 1; - strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & - jb, &c_b871, &a[j + j * a_dim1], lda, &a[j + jb + - j * a_dim1], lda); - } -/* L20: */ - } - } - } - goto L40; - -L30: - *info = *info + j - 1; - -L40: - return 0; - -/* End of SPOTRF */ - -} /* spotrf_ */ - -/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *), slauum_( - char *, integer *, real *, integer *, integer *), strtri_( - char *, char *, integer *, real *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SPOTRI computes the inverse of a real symmetric positive definite - matrix A using the Cholesky factorization A = U**T*U or A = L*L**T - computed by SPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the triangular factor U or L from the Cholesky - factorization A = U**T*U or A = L*L**T, as computed by - SPOTRF. - On exit, the upper or lower triangle of the (symmetric) - inverse of A, overwriting the input factor U or L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the (i,i) element of the factor U or L is - zero, and the inverse could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SPOTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Invert the triangular Cholesky factor U or L. */ - - strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); - if (*info > 0) { - return 0; - } - -/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ - - slauum_(uplo, n, &a[a_offset], lda, info); - - return 0; - -/* End of SPOTRI */ - -} /* spotri_ */ - -/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, - integer *lda, real *b, integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int strsm_(char *, char *, char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer * - ), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - SPOTRS solves a system of linear equations A*X = B with a symmetric - positive definite matrix A using the Cholesky factorization - A = U**T*U or A = L*L**T computed by SPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) REAL array, dimension (LDA,N) - The triangular factor U or L from the Cholesky factorization - A = U**T*U or A = L*L**T, as computed by SPOTRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - B (input/output) REAL array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SPOTRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (upper) { - -/* - Solve A*X = B where A = U'*U. - - Solve U'*X = B, overwriting B with X. -*/ - - strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b871, - &a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A*X = B where A = L*L'. - - Solve L*X = B, overwriting B with X. -*/ - - strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b871, - &a[a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[ - a_offset], lda, &b[b_offset], ldb); - } - - return 0; - -/* End of SPOTRS */ - -} /* spotrs_ */ - -/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e, - real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, - integer *liwork, integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k, m; - static real p; - static integer ii, end, lgn; - static real eps, tiny; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, - integer *, real *, real *, integer *, real *, integer *, real *, - real *, integer *); - static integer lwmin, start; - extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, - integer *), slaed0_(integer *, integer *, integer *, real *, real - *, real *, integer *, real *, integer *, real *, integer *, - integer *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, - real *, integer *), slaset_(char *, integer *, integer *, - real *, real *, real *, integer *); - static integer liwmin, icompz; - static real orgnrm; - extern doublereal slanst_(char *, integer *, real *, real *); - extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *), - slasrt_(char *, integer *, real *, integer *); - static logical lquery; - static integer smlsiz; - extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, - real *, integer *, real *, integer *); - static integer storez, strtrw; - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SSTEDC computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - The eigenvectors of a full or band real symmetric matrix can also be - found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this - matrix to tridiagonal form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See SLAED3 for details. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'I': Compute eigenvectors of tridiagonal matrix also. - = 'V': Compute eigenvectors of original dense symmetric - matrix also. On entry, Z contains the orthogonal - matrix used to reduce the original matrix to - tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the subdiagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Z (input/output) REAL array, dimension (LDZ,N) - On entry, if COMPZ = 'V', then Z contains the orthogonal - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original symmetric matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1. - If eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace/output) REAL array, - dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. - If COMPZ = 'V' and N > 1 then LWORK must be at least - ( 1 + 3*N + 2*N*lg N + 3*N**2 ), - where lg( N ) = smallest integer k such - that 2**k >= N. - If COMPZ = 'I' and N > 1 then LWORK must be at least - ( 1 + 4*N + N**2 ). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. - If COMPZ = 'V' and N > 1 then LIWORK must be at least - ( 6 + 6*N + 5*N*lg N ). - If COMPZ = 'I' and N > 1 then LIWORK must be at least - ( 3 + 5*N ). - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - --iwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1 || *liwork == -1; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (*n <= 1 || icompz <= 0) { - liwmin = 1; - lwmin = 1; - } else { - lgn = (integer) (log((real) (*n)) / log(2.f)); - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (icompz == 1) { -/* Computing 2nd power */ - i__1 = *n; - lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; - liwmin = *n * 6 + 6 + *n * 5 * lgn; - } else if (icompz == 2) { -/* Computing 2nd power */ - i__1 = *n; - lwmin = (*n << 2) + 1 + i__1 * i__1; - liwmin = *n * 5 + 3; - } - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*liwork < liwmin && ! lquery) { - *info = -10; - } - - if (*info == 0) { - work[1] = (real) lwmin; - iwork[1] = liwmin; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SSTEDC", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*n == 1) { - if (icompz != 0) { - z__[z_dim1 + 1] = 1.f; - } - return 0; - } - - smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - If the following conditional clause is removed, then the routine - will use the Divide and Conquer routine to compute only the - eigenvalues, which requires (3N + 3N**2) real workspace and - (2 + 5N + 2N lg(N)) integer workspace. - Since on many architectures SSTERF is much faster than any other - algorithm for finding eigenvalues only, it is used here - as the default. - - If COMPZ = 'N', use SSTERF to compute the eigenvalues. -*/ - - if (icompz == 0) { - ssterf_(n, &d__[1], &e[1], info); - return 0; - } - -/* - If N is smaller than the minimum divide size (SMLSIZ+1), then - solve the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompz == 0) { - ssterf_(n, &d__[1], &e[1], info); - return 0; - } else if (icompz == 2) { - ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], - info); - return 0; - } else { - ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], - info); - return 0; - } - } - -/* - If COMPZ = 'V', the Z matrix must be stored elsewhere for later - use. -*/ - - if (icompz == 1) { - storez = *n * *n + 1; - } else { - storez = 1; - } - - if (icompz == 2) { - slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz); - } - -/* Scale. */ - - orgnrm = slanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.f) { - return 0; - } - - eps = slamch_("Epsilon"); - - start = 1; - -/* while ( START <= N ) */ - -L10: - if (start <= *n) { - -/* - Let END be the position of the next subdiagonal entry such that - E( END ) <= TINY or END = N if no such subdiagonal exists. The - matrix identified by the elements between START and END - constitutes an independent sub-problem. -*/ - - end = start; -L20: - if (end < *n) { - tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = - d__[end + 1], dabs(r__2))); - if ((r__1 = e[end], dabs(r__1)) > tiny) { - ++end; - goto L20; - } - } - -/* (Sub) Problem determined. Compute its size and solve it. */ - - m = end - start + 1; - if (m == 1) { - start = end + 1; - goto L10; - } - if (m > smlsiz) { - *info = smlsiz; - -/* Scale. */ - - orgnrm = slanst_("M", &m, &d__[start], &e[start]); - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[ - start], &m, info); - i__1 = m - 1; - i__2 = m - 1; - slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[ - start], &i__2, info); - - if (icompz == 1) { - strtrw = 1; - } else { - strtrw = start; - } - slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + - start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[ - 1], info); - if (*info != 0) { - *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m - + 1) + start - 1; - return 0; - } - -/* Scale back. */ - - slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[ - start], &m, info); - - } else { - if (icompz == 1) { - -/* - Since QR won't update a Z matrix which is larger than the - length of D, we must solve the sub-problem in a workspace and - then multiply back into Z. -*/ - - ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ - m * m + 1], info); - slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ - storez], n); - sgemm_("N", "N", n, &m, &m, &c_b871, &work[storez], ldz, & - work[1], &m, &c_b1101, &z__[start * z_dim1 + 1], ldz); - } else if (icompz == 2) { - ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start * - z_dim1], ldz, &work[1], info); - } else { - ssterf_(&m, &d__[start], &e[start], info); - } - if (*info != 0) { - *info = start * (*n + 1) + end; - return 0; - } - } - - start = end + 1; - goto L10; - } - -/* - endwhile - - If the problem split any number of times, then the eigenvalues - will not be properly ordered. Here we permute the eigenvalues - (and the associated eigenvectors) into ascending order. -*/ - - if (m != *n) { - if (icompz == 0) { - -/* Use Quick Sort */ - - slasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L30: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 - + 1], &c__1); - } -/* L40: */ - } - } - } - - work[1] = (real) lwmin; - iwork[1] = liwmin; - - return 0; - -/* End of SSTEDC */ - -} /* sstedc_ */ - -/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e, - real *z__, integer *ldz, real *work, integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - real r__1, r__2; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static real b, c__, f, g; - static integer i__, j, k, l, m; - static real p, r__, s; - static integer l1, ii, mm, lm1, mm1, nm1; - static real rt1, rt2, eps; - static integer lsv; - static real tst, eps2; - static integer lend, jtot; - extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) - ; - extern logical lsame_(char *, char *); - static real anorm; - extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, - integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *); - static integer lendm1, lendp1; - extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * - , real *, real *); - extern doublereal slapy2_(real *, real *); - static integer iscale; - extern doublereal slamch_(char *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real safmax; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - static integer lendsv; - extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * - ), slaset_(char *, integer *, integer *, real *, real *, real *, - integer *); - static real ssfmin; - static integer nmaxit, icompz; - static real ssfmax; - extern doublereal slanst_(char *, integer *, real *, real *); - extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - SSTEQR computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the implicit QL or QR method. - The eigenvectors of a full or band symmetric matrix can also be found - if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to - tridiagonal form. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'V': Compute eigenvalues and eigenvectors of the original - symmetric matrix. On entry, Z must contain the - orthogonal matrix used to reduce the original matrix - to tridiagonal form. - = 'I': Compute eigenvalues and eigenvectors of the - tridiagonal matrix. Z is initialized to the identity - matrix. - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - Z (input/output) REAL array, dimension (LDZ, N) - On entry, if COMPZ = 'V', then Z contains the orthogonal - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original symmetric matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1, and if - eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace) REAL array, dimension (max(1,2*N-2)) - If COMPZ = 'N', then WORK is not referenced. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm has failed to find all the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero; on exit, D - and E contain the elements of a symmetric tridiagonal - matrix which is orthogonally similar to the original - matrix. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - *info = 0; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SSTEQR", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - if (icompz == 2) { - z__[z_dim1 + 1] = 1.f; - } - return 0; - } - -/* Determine the unit roundoff and over/underflow thresholds. */ - - eps = slamch_("E"); -/* Computing 2nd power */ - r__1 = eps; - eps2 = r__1 * r__1; - safmin = slamch_("S"); - safmax = 1.f / safmin; - ssfmax = sqrt(safmax) / 3.f; - ssfmin = sqrt(safmin) / eps2; - -/* - Compute the eigenvalues and eigenvectors of the tridiagonal - matrix. -*/ - - if (icompz == 2) { - slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz); - } - - nmaxit = *n * 30; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - nm1 = *n - 1; - -L10: - if (l1 > *n) { - goto L160; - } - if (l1 > 1) { - e[l1 - 1] = 0.f; - } - if (l1 <= nm1) { - i__1 = nm1; - for (m = l1; m <= i__1; ++m) { - tst = (r__1 = e[m], dabs(r__1)); - if (tst == 0.f) { - goto L30; - } - if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m - + 1], dabs(r__2))) * eps) { - e[m] = 0.f; - goto L30; - } -/* L20: */ - } - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = slanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm == 0.f) { - goto L10; - } - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - -/* Choose between QL and QR iteration */ - - if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { - lend = lsv; - l = lendsv; - } - - if (lend > l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L40: - if (l != lend) { - lendm1 = lend - 1; - i__1 = lendm1; - for (m = l; m <= i__1; ++m) { -/* Computing 2nd power */ - r__2 = (r__1 = e[m], dabs(r__1)); - tst = r__2 * r__2; - if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m - + 1], dabs(r__2)) + safmin) { - goto L60; - } -/* L50: */ - } - } - - m = lend; - -L60: - if (m < lend) { - e[m] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L80; - } - -/* - If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l + 1) { - if (icompz > 0) { - slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); - work[l] = c__; - work[*n - 1 + l] = s; - slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & - z__[l * z_dim1 + 1], ldz); - } else { - slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); - } - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.f; - l += 2; - if (l <= lend) { - goto L40; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l + 1] - p) / (e[l] * 2.f); - r__ = slapy2_(&g, &c_b871); - g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); - - s = 1.f; - c__ = 1.f; - p = 0.f; - -/* Inner loop */ - - mm1 = m - 1; - i__1 = l; - for (i__ = mm1; i__ >= i__1; --i__) { - f = s * e[i__]; - b = c__ * e[i__]; - slartg_(&g, &f, &c__, &s, &r__); - if (i__ != m - 1) { - e[i__ + 1] = r__; - } - g = d__[i__ + 1] - p; - r__ = (d__[i__] - g) * s + c__ * 2.f * b; - p = s * r__; - d__[i__ + 1] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = -s; - } - -/* L70: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = m - l + 1; - slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[l] = g; - goto L40; - -/* Eigenvalue found. */ - -L80: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L40; - } - goto L140; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L90: - if (l != lend) { - lendp1 = lend + 1; - i__1 = lendp1; - for (m = l; m >= i__1; --m) { -/* Computing 2nd power */ - r__2 = (r__1 = e[m - 1], dabs(r__1)); - tst = r__2 * r__2; - if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m - - 1], dabs(r__2)) + safmin) { - goto L110; - } -/* L100: */ - } - } - - m = lend; - -L110: - if (m > lend) { - e[m - 1] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L130; - } - -/* - If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l - 1) { - if (icompz > 0) { - slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) - ; - work[m] = c__; - work[*n - 1 + m] = s; - slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & - z__[(l - 1) * z_dim1 + 1], ldz); - } else { - slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); - } - d__[l - 1] = rt1; - d__[l] = rt2; - e[l - 1] = 0.f; - l += -2; - if (l >= lend) { - goto L90; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l - 1] - p) / (e[l - 1] * 2.f); - r__ = slapy2_(&g, &c_b871); - g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); - - s = 1.f; - c__ = 1.f; - p = 0.f; - -/* Inner loop */ - - lm1 = l - 1; - i__1 = lm1; - for (i__ = m; i__ <= i__1; ++i__) { - f = s * e[i__]; - b = c__ * e[i__]; - slartg_(&g, &f, &c__, &s, &r__); - if (i__ != m) { - e[i__ - 1] = r__; - } - g = d__[i__] - p; - r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; - p = s * r__; - d__[i__] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = s; - } - -/* L120: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = l - m + 1; - slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[lm1] = g; - goto L90; - -/* Eigenvalue found. */ - -L130: - d__[l] = p; - - --l; - if (l >= lend) { - goto L90; - } - goto L140; - - } - -/* Undo scaling if necessary */ - -L140: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } else if (iscale == 2) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot < nmaxit) { - goto L10; - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.f) { - ++(*info); - } -/* L150: */ - } - goto L190; - -/* Order eigenvalues and eigenvectors. */ - -L160: - if (icompz == 0) { - -/* Use Quick Sort */ - - slasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L170: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L180: */ - } - } - -L190: - return 0; - -/* End of SSTEQR */ - -} /* ssteqr_ */ - -/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info) -{ - /* System generated locals */ - integer i__1; - real r__1, r__2, r__3; - - /* Builtin functions */ - double sqrt(doublereal), r_sign(real *, real *); - - /* Local variables */ - static real c__; - static integer i__, l, m; - static real p, r__, s; - static integer l1; - static real bb, rt1, rt2, eps, rte; - static integer lsv; - static real eps2, oldc; - static integer lend, jtot; - extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) - ; - static real gamma, alpha, sigma, anorm; - extern doublereal slapy2_(real *, real *); - static integer iscale; - static real oldgam; - extern doublereal slamch_(char *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real safmax; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - static integer lendsv; - static real ssfmin; - static integer nmaxit; - static real ssfmax; - extern doublereal slanst_(char *, integer *, real *, real *); - extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SSTERF computes all eigenvalues of a symmetric tridiagonal matrix - using the Pal-Walker-Kahan variant of the QL or QR algorithm. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) REAL array, dimension (N) - On entry, the n diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) REAL array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm failed to find all of the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --e; - --d__; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n < 0) { - *info = -1; - i__1 = -(*info); - xerbla_("SSTERF", &i__1); - return 0; - } - if (*n <= 1) { - return 0; - } - -/* Determine the unit roundoff for this environment. */ - - eps = slamch_("E"); -/* Computing 2nd power */ - r__1 = eps; - eps2 = r__1 * r__1; - safmin = slamch_("S"); - safmax = 1.f / safmin; - ssfmax = sqrt(safmax) / 3.f; - ssfmin = sqrt(safmin) / eps2; - -/* Compute the eigenvalues of the tridiagonal matrix. */ - - nmaxit = *n * 30; - sigma = 0.f; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - -L10: - if (l1 > *n) { - goto L170; - } - if (l1 > 1) { - e[l1 - 1] = 0.f; - } - i__1 = *n - 1; - for (m = l1; m <= i__1; ++m) { - if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) * - sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) { - e[m] = 0.f; - goto L30; - } -/* L20: */ - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = slanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - - i__1 = lend - 1; - for (i__ = l; i__ <= i__1; ++i__) { -/* Computing 2nd power */ - r__1 = e[i__]; - e[i__] = r__1 * r__1; -/* L40: */ - } - -/* Choose between QL and QR iteration */ - - if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { - lend = lsv; - l = lendsv; - } - - if (lend >= l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L50: - if (l != lend) { - i__1 = lend - 1; - for (m = l; m <= i__1; ++m) { - if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ - m + 1], dabs(r__1))) { - goto L70; - } -/* L60: */ - } - } - m = lend; - -L70: - if (m < lend) { - e[m] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L90; - } - -/* - If remaining matrix is 2 by 2, use SLAE2 to compute its - eigenvalues. -*/ - - if (m == l + 1) { - rte = sqrt(e[l]); - slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.f; - l += 2; - if (l <= lend) { - goto L50; - } - goto L150; - } - - if (jtot == nmaxit) { - goto L150; - } - ++jtot; - -/* Form shift. */ - - rte = sqrt(e[l]); - sigma = (d__[l + 1] - p) / (rte * 2.f); - r__ = slapy2_(&sigma, &c_b871); - sigma = p - rte / (sigma + r_sign(&r__, &sigma)); - - c__ = 1.f; - s = 0.f; - gamma = d__[m] - sigma; - p = gamma * gamma; - -/* Inner loop */ - - i__1 = l; - for (i__ = m - 1; i__ >= i__1; --i__) { - bb = e[i__]; - r__ = p + bb; - if (i__ != m - 1) { - e[i__ + 1] = s * r__; - } - oldc = c__; - c__ = p / r__; - s = bb / r__; - oldgam = gamma; - alpha = d__[i__]; - gamma = c__ * (alpha - sigma) - s * oldgam; - d__[i__ + 1] = oldgam + (alpha - gamma); - if (c__ != 0.f) { - p = gamma * gamma / c__; - } else { - p = oldc * bb; - } -/* L80: */ - } - - e[l] = s * p; - d__[l] = sigma + gamma; - goto L50; - -/* Eigenvalue found. */ - -L90: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L50; - } - goto L150; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L100: - i__1 = lend + 1; - for (m = l; m >= i__1; --m) { - if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ - m - 1], dabs(r__1))) { - goto L120; - } -/* L110: */ - } - m = lend; - -L120: - if (m > lend) { - e[m - 1] = 0.f; - } - p = d__[l]; - if (m == l) { - goto L140; - } - -/* - If remaining matrix is 2 by 2, use SLAE2 to compute its - eigenvalues. -*/ - - if (m == l - 1) { - rte = sqrt(e[l - 1]); - slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); - d__[l] = rt1; - d__[l - 1] = rt2; - e[l - 1] = 0.f; - l += -2; - if (l >= lend) { - goto L100; - } - goto L150; - } - - if (jtot == nmaxit) { - goto L150; - } - ++jtot; - -/* Form shift. */ - - rte = sqrt(e[l - 1]); - sigma = (d__[l - 1] - p) / (rte * 2.f); - r__ = slapy2_(&sigma, &c_b871); - sigma = p - rte / (sigma + r_sign(&r__, &sigma)); - - c__ = 1.f; - s = 0.f; - gamma = d__[m] - sigma; - p = gamma * gamma; - -/* Inner loop */ - - i__1 = l - 1; - for (i__ = m; i__ <= i__1; ++i__) { - bb = e[i__]; - r__ = p + bb; - if (i__ != m) { - e[i__ - 1] = s * r__; - } - oldc = c__; - c__ = p / r__; - s = bb / r__; - oldgam = gamma; - alpha = d__[i__ + 1]; - gamma = c__ * (alpha - sigma) - s * oldgam; - d__[i__] = oldgam + (alpha - gamma); - if (c__ != 0.f) { - p = gamma * gamma / c__; - } else { - p = oldc * bb; - } -/* L130: */ - } - - e[l - 1] = s * p; - d__[l] = sigma + gamma; - goto L100; - -/* Eigenvalue found. */ - -L140: - d__[l] = p; - - --l; - if (l >= lend) { - goto L100; - } - goto L150; - - } - -/* Undo scaling if necessary */ - -L150: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - } - if (iscale == 2) { - i__1 = lendsv - lsv + 1; - slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot < nmaxit) { - goto L10; - } - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.f) { - ++(*info); - } -/* L160: */ - } - goto L180; - -/* Sort eigenvalues in increasing order. */ - -L170: - slasrt_("I", n, &d__[1], info); - -L180: - return 0; - -/* End of SSTERF */ - -} /* ssterf_ */ - -/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a, - integer *lda, real *w, real *work, integer *lwork, integer *iwork, - integer *liwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - real r__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static real eps; - static integer inde; - static real anrm, rmin, rmax; - static integer lopt; - static real sigma; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static integer lwmin, liopt; - static logical lower, wantz; - static integer indwk2, llwrk2, iscale; - extern doublereal slamch_(char *); - static real safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static real bignum; - extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, - real *, integer *, integer *, real *, integer *, integer *); - static integer indtau; - extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *, - real *, integer *, real *, integer *, integer *, integer *, - integer *), slacpy_(char *, integer *, integer *, real *, - integer *, real *, integer *); - static integer indwrk, liwmin; - extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); - extern doublereal slansy_(char *, char *, integer *, real *, integer *, - real *); - static integer llwork; - static real smlnum; - static logical lquery; - extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *, - integer *, real *, integer *, real *, real *, integer *, real *, - integer *, integer *), ssytrd_(char *, - integer *, real *, integer *, real *, real *, real *, real *, - integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SSYEVD computes all eigenvalues and, optionally, eigenvectors of a - real symmetric matrix A. If eigenvectors are desired, it uses a - divide and conquer algorithm. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Because of large use of BLAS of level 3, SSYEVD needs N**2 more - workspace than SSYEVX. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only; - = 'V': Compute eigenvalues and eigenvectors. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA, N) - On entry, the symmetric matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of A contains the - upper triangular part of the matrix A. If UPLO = 'L', - the leading N-by-N lower triangular part of A contains - the lower triangular part of the matrix A. - On exit, if JOBZ = 'V', then if INFO = 0, A contains the - orthonormal eigenvectors of the matrix A. - If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') - or the upper triangle (if UPLO='U') of A, including the - diagonal, is destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) REAL array, dimension (N) - If INFO = 0, the eigenvalues in ascending order. - - WORK (workspace/output) REAL array, - dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If N <= 1, LWORK must be at least 1. - If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. - If JOBZ = 'V' and N > 1, LWORK must be at least - 1 + 6*N + 2*N**2. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If N <= 1, LIWORK must be at least 1. - If JOBZ = 'N' and N > 1, LIWORK must be at least 1. - If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the algorithm failed to converge; i - off-diagonal elements of an intermediate tridiagonal - form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - Modified by Francoise Tisseur, University of Tennessee. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - --work; - --iwork; - - /* Function Body */ - wantz = lsame_(jobz, "V"); - lower = lsame_(uplo, "L"); - lquery = *lwork == -1 || *liwork == -1; - - *info = 0; - if (*n <= 1) { - liwmin = 1; - lwmin = 1; - lopt = lwmin; - liopt = liwmin; - } else { - if (wantz) { - liwmin = *n * 5 + 3; -/* Computing 2nd power */ - i__1 = *n; - lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); - } else { - liwmin = 1; - lwmin = (*n << 1) + 1; - } - lopt = lwmin; - liopt = liwmin; - } - if (! (wantz || lsame_(jobz, "N"))) { - *info = -1; - } else if (! (lower || lsame_(uplo, "U"))) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*liwork < liwmin && ! lquery) { - *info = -10; - } - - if (*info == 0) { - work[1] = (real) lopt; - iwork[1] = liopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SSYEVD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - w[1] = a[a_dim1 + 1]; - if (wantz) { - a[a_dim1 + 1] = 1.f; - } - return 0; - } - -/* Get machine constants. */ - - safmin = slamch_("Safe minimum"); - eps = slamch_("Precision"); - smlnum = safmin / eps; - bignum = 1.f / smlnum; - rmin = sqrt(smlnum); - rmax = sqrt(bignum); - -/* Scale matrix to allowable range, if necessary. */ - - anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]); - iscale = 0; - if (anrm > 0.f && anrm < rmin) { - iscale = 1; - sigma = rmin / anrm; - } else if (anrm > rmax) { - iscale = 1; - sigma = rmax / anrm; - } - if (iscale == 1) { - slascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda, - info); - } - -/* Call SSYTRD to reduce symmetric matrix to tridiagonal form. */ - - inde = 1; - indtau = inde + *n; - indwrk = indtau + *n; - llwork = *lwork - indwrk + 1; - indwk2 = indwrk + *n * *n; - llwrk2 = *lwork - indwk2 + 1; - - ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & - work[indwrk], &llwork, &iinfo); - lopt = (*n << 1) + work[indwrk]; - -/* - For eigenvalues only, call SSTERF. For eigenvectors, first call - SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the - tridiagonal matrix, then call SORMTR to multiply it by the - Householder transformations stored in A. -*/ - - if (! wantz) { - ssterf_(n, &w[1], &work[inde], info); - } else { - sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & - llwrk2, &iwork[1], liwork, info); - sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ - indwrk], n, &work[indwk2], &llwrk2, &iinfo); - slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); -/* - Computing MAX - Computing 2nd power -*/ - i__3 = *n; - i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); - lopt = max(i__1,i__2); - } - -/* If matrix was scaled, then rescale eigenvalues appropriately. */ - - if (iscale == 1) { - r__1 = 1.f / sigma; - sscal_(n, &r__1, &w[1], &c__1); - } - - work[1] = (real) lopt; - iwork[1] = liopt; - - return 0; - -/* End of SSYEVD */ - -} /* ssyevd_ */ - -/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, - real *d__, real *e, real *tau, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__; - static real taui; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, - integer *, real *, integer *, real *, integer *); - static real alpha; - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, - real *, integer *), ssymv_(char *, integer *, real *, real *, - integer *, real *, integer *, real *, real *, integer *), - xerbla_(char *, integer *), slarfg_(integer *, real *, - real *, integer *, real *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal - form T by an orthogonal similarity transformation: Q' * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - symmetric matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the orthogonal - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the orthogonal matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) REAL array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) REAL array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) REAL array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("SSYTD2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - - if (upper) { - -/* Reduce the upper triangle of A */ - - for (i__ = *n - 1; i__ >= 1; --i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(1:i-1,i+1) -*/ - - slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 - + 1], &c__1, &taui); - e[i__] = a[i__ + (i__ + 1) * a_dim1]; - - if (taui != 0.f) { - -/* Apply H(i) from both sides to A(1:i,1:i) */ - - a[i__ + (i__ + 1) * a_dim1] = 1.f; - -/* Compute x := tau * A * v storing x in TAU(1:i) */ - - ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * - a_dim1 + 1], &c__1, &c_b1101, &tau[1], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1) - * a_dim1 + 1], &c__1); - saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ - 1], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - ssyr2_(uplo, &i__, &c_b1150, &a[(i__ + 1) * a_dim1 + 1], & - c__1, &tau[1], &c__1, &a[a_offset], lda); - - a[i__ + (i__ + 1) * a_dim1] = e[i__]; - } - d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; - tau[i__] = taui; -/* L10: */ - } - d__[1] = a[a_dim1 + 1]; - } else { - -/* Reduce the lower triangle of A */ - - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(i+2:n,i) -*/ - - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * - a_dim1], &c__1, &taui); - e[i__] = a[i__ + 1 + i__ * a_dim1]; - - if (taui != 0.f) { - -/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ - - a[i__ + 1 + i__ * a_dim1] = 1.f; - -/* Compute x := tau * A * v storing y in TAU(i:n-1) */ - - i__2 = *n - i__; - ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1101, & - tau[i__], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - i__2 = *n - i__; - alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ + - 1 + i__ * a_dim1], &c__1); - i__2 = *n - i__; - saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - i__2 = *n - i__; - ssyr2_(uplo, &i__2, &c_b1150, &a[i__ + 1 + i__ * a_dim1], & - c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda); - - a[i__ + 1 + i__ * a_dim1] = e[i__]; - } - d__[i__] = a[i__ + i__ * a_dim1]; - tau[i__] = taui; -/* L20: */ - } - d__[*n] = a[*n + *n * a_dim1]; - } - - return 0; - -/* End of SSYTD2 */ - -} /* ssytd2_ */ - -/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda, - real *d__, real *e, real *tau, real *work, integer *lwork, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, kk, nx, iws; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - static logical upper; - extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *, - real *, real *, real *, integer *), ssyr2k_(char *, char * - , integer *, integer *, real *, real *, integer *, real *, - integer *, real *, real *, integer *), xerbla_( - char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *, - integer *, real *, real *, real *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - SSYTRD reduces a real symmetric matrix A to real symmetric - tridiagonal form T by an orthogonal similarity transformation: - Q**T * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the symmetric matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the orthogonal - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the orthogonal matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) REAL array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) REAL array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) REAL array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) REAL array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a real scalar, and v is a real vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - --work; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*lwork < 1 && ! lquery) { - *info = -9; - } - - if (*info == 0) { - -/* Determine the block size. */ - - nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, - (ftnlen)1); - lwkopt = *n * nb; - work[1] = (real) lwkopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("SSYTRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1] = 1.f; - return 0; - } - - nx = *n; - iws = 1; - if (nb > 1 && nb < *n) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *n) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code by setting NX = N. - - Computing MAX -*/ - i__1 = *lwork / ldwork; - nb = max(i__1,1); - nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, - (ftnlen)6, (ftnlen)1); - if (nb < nbmin) { - nx = *n; - } - } - } else { - nx = *n; - } - } else { - nb = 1; - } - - if (upper) { - -/* - Reduce the upper triangle of A. - Columns 1:kk are handled by the unblocked method. -*/ - - kk = *n - (*n - nx + nb - 1) / nb * nb; - i__1 = kk + 1; - i__2 = -nb; - for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = i__ + nb - 1; - slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & - work[1], &ldwork); - -/* - Update the unreduced submatrix A(1:i-1,1:i-1), using an - update of the form: A := A - V*W' - W*V' -*/ - - i__3 = i__ - 1; - ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1150, &a[i__ * - a_dim1 + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset] - , lda); - -/* - Copy superdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j - 1 + j * a_dim1] = e[j - 1]; - d__[j] = a[j + j * a_dim1]; -/* L10: */ - } -/* L20: */ - } - -/* Use unblocked code to reduce the last or only block */ - - ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); - } else { - -/* Reduce the lower triangle of A */ - - i__2 = *n - nx; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = *n - i__ + 1; - slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & - tau[i__], &work[1], &ldwork); - -/* - Update the unreduced submatrix A(i+ib:n,i+ib:n), using - an update of the form: A := A - V*W' - W*V' -*/ - - i__3 = *n - i__ - nb + 1; - ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1150, &a[i__ + nb + - i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[ - i__ + nb + (i__ + nb) * a_dim1], lda); - -/* - Copy subdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - a[j + 1 + j * a_dim1] = e[j]; - d__[j] = a[j + j * a_dim1]; -/* L30: */ - } -/* L40: */ - } - -/* Use unblocked code to reduce the last or only block */ - - i__1 = *n - i__ + 1; - ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], - &tau[i__], &iinfo); - } - - work[1] = (real) lwkopt; - return 0; - -/* End of SSYTRD */ - -} /* ssytrd_ */ - -/* Subroutine */ int strevc_(char *side, char *howmny, logical *select, - integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, - integer *ldvr, integer *mm, integer *m, real *work, integer *info) -{ - /* System generated locals */ - integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3; - real r__1, r__2, r__3, r__4; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k; - static real x[4] /* was [2][2] */; - static integer j1, j2, n2, ii, ki, ip, is; - static real wi, wr, rec, ulp, beta, emax; - static logical pair, allv; - static integer ierr; - static real unfl, ovfl, smin; - extern doublereal sdot_(integer *, real *, integer *, real *, integer *); - static logical over; - static real vmax; - static integer jnxt; - static real scale; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static real remax; - static logical leftv; - extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, - real *, integer *, real *, integer *, real *, real *, integer *); - static logical bothv; - static real vcrit; - static logical somev; - extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, - integer *); - static real xnorm; - extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, - real *, integer *), slaln2_(logical *, integer *, integer *, real - *, real *, real *, integer *, real *, real *, real *, integer *, - real *, real *, real *, integer *, real *, real *, integer *), - slabad_(real *, real *); - extern doublereal slamch_(char *); - extern /* Subroutine */ int xerbla_(char *, integer *); - static real bignum; - extern integer isamax_(integer *, real *, integer *); - static logical rightv; - static real smlnum; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - STREVC computes some or all of the right and/or left eigenvectors of - a real upper quasi-triangular matrix T. - - The right eigenvector x and the left eigenvector y of T corresponding - to an eigenvalue w are defined by: - - T*x = w*x, y'*T = w*y' - - where y' denotes the conjugate transpose of the vector y. - - If all eigenvectors are requested, the routine may either return the - matrices X and/or Y of right or left eigenvectors of T, or the - products Q*X and/or Q*Y, where Q is an input orthogonal - matrix. If T was obtained from the real-Schur factorization of an - original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of - right or left eigenvectors of A. - - T must be in Schur canonical form (as returned by SHSEQR), that is, - block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each - 2-by-2 diagonal block has its diagonal elements equal and its - off-diagonal elements of opposite sign. Corresponding to each 2-by-2 - diagonal block is a complex conjugate pair of eigenvalues and - eigenvectors; only one eigenvector of the pair is computed, namely - the one corresponding to the eigenvalue with positive imaginary part. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'R': compute right eigenvectors only; - = 'L': compute left eigenvectors only; - = 'B': compute both right and left eigenvectors. - - HOWMNY (input) CHARACTER*1 - = 'A': compute all right and/or left eigenvectors; - = 'B': compute all right and/or left eigenvectors, - and backtransform them using the input matrices - supplied in VR and/or VL; - = 'S': compute selected right and/or left eigenvectors, - specified by the logical array SELECT. - - SELECT (input/output) LOGICAL array, dimension (N) - If HOWMNY = 'S', SELECT specifies the eigenvectors to be - computed. - If HOWMNY = 'A' or 'B', SELECT is not referenced. - To select the real eigenvector corresponding to a real - eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select - the complex eigenvector corresponding to a complex conjugate - pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be - set to .TRUE.; then on exit SELECT(j) is .TRUE. and - SELECT(j+1) is .FALSE.. - - N (input) INTEGER - The order of the matrix T. N >= 0. - - T (input) REAL array, dimension (LDT,N) - The upper quasi-triangular matrix T in Schur canonical form. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= max(1,N). - - VL (input/output) REAL array, dimension (LDVL,MM) - On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must - contain an N-by-N matrix Q (usually the orthogonal matrix Q - of Schur vectors returned by SHSEQR). - On exit, if SIDE = 'L' or 'B', VL contains: - if HOWMNY = 'A', the matrix Y of left eigenvectors of T; - VL has the same quasi-lower triangular form - as T'. If T(i,i) is a real eigenvalue, then - the i-th column VL(i) of VL is its - corresponding eigenvector. If T(i:i+1,i:i+1) - is a 2-by-2 block whose eigenvalues are - complex-conjugate eigenvalues of T, then - VL(i)+sqrt(-1)*VL(i+1) is the complex - eigenvector corresponding to the eigenvalue - with positive real part. - if HOWMNY = 'B', the matrix Q*Y; - if HOWMNY = 'S', the left eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VL, in the same order as their - eigenvalues. - A complex eigenvector corresponding to a complex eigenvalue - is stored in two consecutive columns, the first holding the - real part, and the second the imaginary part. - If SIDE = 'R', VL is not referenced. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= max(1,N) if - SIDE = 'L' or 'B'; LDVL >= 1 otherwise. - - VR (input/output) REAL array, dimension (LDVR,MM) - On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must - contain an N-by-N matrix Q (usually the orthogonal matrix Q - of Schur vectors returned by SHSEQR). - On exit, if SIDE = 'R' or 'B', VR contains: - if HOWMNY = 'A', the matrix X of right eigenvectors of T; - VR has the same quasi-upper triangular form - as T. If T(i,i) is a real eigenvalue, then - the i-th column VR(i) of VR is its - corresponding eigenvector. If T(i:i+1,i:i+1) - is a 2-by-2 block whose eigenvalues are - complex-conjugate eigenvalues of T, then - VR(i)+sqrt(-1)*VR(i+1) is the complex - eigenvector corresponding to the eigenvalue - with positive real part. - if HOWMNY = 'B', the matrix Q*X; - if HOWMNY = 'S', the right eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VR, in the same order as their - eigenvalues. - A complex eigenvector corresponding to a complex eigenvalue - is stored in two consecutive columns, the first holding the - real part and the second the imaginary part. - If SIDE = 'L', VR is not referenced. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= max(1,N) if - SIDE = 'R' or 'B'; LDVR >= 1 otherwise. - - MM (input) INTEGER - The number of columns in the arrays VL and/or VR. MM >= M. - - M (output) INTEGER - The number of columns in the arrays VL and/or VR actually - used to store the eigenvectors. - If HOWMNY = 'A' or 'B', M is set to N. - Each selected real eigenvector occupies one column and each - selected complex eigenvector occupies two columns. - - WORK (workspace) REAL array, dimension (3*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The algorithm used in this program is basically backward (forward) - substitution, with scaling to make the the code robust against - possible overflow. - - Each eigenvector is normalized so that the element of largest - magnitude has magnitude 1; here the magnitude of a complex number - (x,y) is taken to be |x| + |y|. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - --select; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - - /* Function Body */ - bothv = lsame_(side, "B"); - rightv = lsame_(side, "R") || bothv; - leftv = lsame_(side, "L") || bothv; - - allv = lsame_(howmny, "A"); - over = lsame_(howmny, "B"); - somev = lsame_(howmny, "S"); - - *info = 0; - if (! rightv && ! leftv) { - *info = -1; - } else if (! allv && ! over && ! somev) { - *info = -2; - } else if (*n < 0) { - *info = -4; - } else if (*ldt < max(1,*n)) { - *info = -6; - } else if (*ldvl < 1 || leftv && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || rightv && *ldvr < *n) { - *info = -10; - } else { - -/* - Set M to the number of columns required to store the selected - eigenvectors, standardize the array SELECT if necessary, and - test MM. -*/ - - if (somev) { - *m = 0; - pair = FALSE_; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (pair) { - pair = FALSE_; - select[j] = FALSE_; - } else { - if (j < *n) { - if (t[j + 1 + j * t_dim1] == 0.f) { - if (select[j]) { - ++(*m); - } - } else { - pair = TRUE_; - if (select[j] || select[j + 1]) { - select[j] = TRUE_; - *m += 2; - } - } - } else { - if (select[*n]) { - ++(*m); - } - } - } -/* L10: */ - } - } else { - *m = *n; - } - - if (*mm < *m) { - *info = -11; - } - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("STREVC", &i__1); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* Set the constants to control overflow. */ - - unfl = slamch_("Safe minimum"); - ovfl = 1.f / unfl; - slabad_(&unfl, &ovfl); - ulp = slamch_("Precision"); - smlnum = unfl * (*n / ulp); - bignum = (1.f - ulp) / smlnum; - -/* - Compute 1-norm of each column of strictly upper triangular - part of T to control overflow in triangular solver. -*/ - - work[1] = 0.f; - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - work[j] = 0.f; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1)); -/* L20: */ - } -/* L30: */ - } - -/* - Index IP is used to specify the real or complex eigenvalue: - IP = 0, real eigenvalue, - 1, first of conjugate complex pair: (wr,wi) - -1, second of conjugate complex pair: (wr,wi) -*/ - - n2 = *n << 1; - - if (rightv) { - -/* Compute right eigenvectors. */ - - ip = 0; - is = *m; - for (ki = *n; ki >= 1; --ki) { - - if (ip == 1) { - goto L130; - } - if (ki == 1) { - goto L40; - } - if (t[ki + (ki - 1) * t_dim1] == 0.f) { - goto L40; - } - ip = -1; - -L40: - if (somev) { - if (ip == 0) { - if (! select[ki]) { - goto L130; - } - } else { - if (! select[ki - 1]) { - goto L130; - } - } - } - -/* Compute the KI-th eigenvalue (WR,WI). */ - - wr = t[ki + ki * t_dim1]; - wi = 0.f; - if (ip != 0) { - wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) * - sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2))); - } -/* Computing MAX */ - r__1 = ulp * (dabs(wr) + dabs(wi)); - smin = dmax(r__1,smlnum); - - if (ip == 0) { - -/* Real right eigenvector */ - - work[ki + *n] = 1.f; - -/* Form right-hand side */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - work[k + *n] = -t[k + ki * t_dim1]; -/* L50: */ - } - -/* - Solve the upper quasi-triangular system: - (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. -*/ - - jnxt = ki - 1; - for (j = ki - 1; j >= 1; --j) { - if (j > jnxt) { - goto L60; - } - j1 = j; - j2 = j; - jnxt = j - 1; - if (j > 1) { - if (t[j + (j - 1) * t_dim1] != 0.f) { - j1 = j - 1; - jnxt = j - 2; - } - } - - if (j1 == j2) { - -/* 1-by-1 diagonal block */ - - slaln2_(&c_false, &c__1, &c__1, &smin, &c_b871, &t[j - + j * t_dim1], ldt, &c_b871, &c_b871, &work[j - + *n], n, &wr, &c_b1101, x, &c__2, &scale, & - xnorm, &ierr); - -/* - Scale X(1,1) to avoid overflow when updating - the right-hand side. -*/ - - if (xnorm > 1.f) { - if (work[j] > bignum / xnorm) { - x[0] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.f) { - sscal_(&ki, &scale, &work[*n + 1], &c__1); - } - work[j + *n] = x[0]; - -/* Update right-hand side */ - - i__1 = j - 1; - r__1 = -x[0]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - - } else { - -/* 2-by-2 diagonal block */ - - slaln2_(&c_false, &c__2, &c__1, &smin, &c_b871, &t[j - - 1 + (j - 1) * t_dim1], ldt, &c_b871, & - c_b871, &work[j - 1 + *n], n, &wr, &c_b1101, - x, &c__2, &scale, &xnorm, &ierr); - -/* - Scale X(1,1) and X(2,1) to avoid overflow when - updating the right-hand side. -*/ - - if (xnorm > 1.f) { -/* Computing MAX */ - r__1 = work[j - 1], r__2 = work[j]; - beta = dmax(r__1,r__2); - if (beta > bignum / xnorm) { - x[0] /= xnorm; - x[1] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.f) { - sscal_(&ki, &scale, &work[*n + 1], &c__1); - } - work[j - 1 + *n] = x[0]; - work[j + *n] = x[1]; - -/* Update right-hand side */ - - i__1 = j - 2; - r__1 = -x[0]; - saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[*n + 1], &c__1); - i__1 = j - 2; - r__1 = -x[1]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - } -L60: - ; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & - c__1); - - ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); - remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1)); - sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - vr[k + is * vr_dim1] = 0.f; -/* L70: */ - } - } else { - if (ki > 1) { - i__1 = ki - 1; - sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, & - work[*n + 1], &c__1, &work[ki + *n], &vr[ki * - vr_dim1 + 1], &c__1); - } - - ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1); - remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1)); - sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - - } else { - -/* - Complex right eigenvector. - - Initial solve - [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. - [ (T(KI,KI-1) T(KI,KI) ) ] -*/ - - if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[ - ki + (ki - 1) * t_dim1], dabs(r__2))) { - work[ki - 1 + *n] = 1.f; - work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; - } else { - work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; - work[ki + n2] = 1.f; - } - work[ki + *n] = 0.f; - work[ki - 1 + n2] = 0.f; - -/* Form right-hand side */ - - i__1 = ki - 2; - for (k = 1; k <= i__1; ++k) { - work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * - t_dim1]; - work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; -/* L80: */ - } - -/* - Solve upper quasi-triangular system: - (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) -*/ - - jnxt = ki - 2; - for (j = ki - 2; j >= 1; --j) { - if (j > jnxt) { - goto L90; - } - j1 = j; - j2 = j; - jnxt = j - 1; - if (j > 1) { - if (t[j + (j - 1) * t_dim1] != 0.f) { - j1 = j - 1; - jnxt = j - 2; - } - } - - if (j1 == j2) { - -/* 1-by-1 diagonal block */ - - slaln2_(&c_false, &c__1, &c__2, &smin, &c_b871, &t[j - + j * t_dim1], ldt, &c_b871, &c_b871, &work[j - + *n], n, &wr, &wi, x, &c__2, &scale, &xnorm, - &ierr); - -/* - Scale X(1,1) and X(1,2) to avoid overflow when - updating the right-hand side. -*/ - - if (xnorm > 1.f) { - if (work[j] > bignum / xnorm) { - x[0] /= xnorm; - x[2] /= xnorm; - scale /= xnorm; - } - } - -/* Scale if necessary */ - - if (scale != 1.f) { - sscal_(&ki, &scale, &work[*n + 1], &c__1); - sscal_(&ki, &scale, &work[n2 + 1], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; - -/* Update the right-hand side */ - - i__1 = j - 1; - r__1 = -x[0]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - i__1 = j - 1; - r__1 = -x[2]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - n2 + 1], &c__1); - - } else { - -/* 2-by-2 diagonal block */ - - slaln2_(&c_false, &c__2, &c__2, &smin, &c_b871, &t[j - - 1 + (j - 1) * t_dim1], ldt, &c_b871, & - c_b871, &work[j - 1 + *n], n, &wr, &wi, x, & - c__2, &scale, &xnorm, &ierr); - -/* - Scale X to avoid overflow when updating - the right-hand side. -*/ - - if (xnorm > 1.f) { -/* Computing MAX */ - r__1 = work[j - 1], r__2 = work[j]; - beta = dmax(r__1,r__2); - if (beta > bignum / xnorm) { - rec = 1.f / xnorm; - x[0] *= rec; - x[2] *= rec; - x[1] *= rec; - x[3] *= rec; - scale *= rec; - } - } - -/* Scale if necessary */ - - if (scale != 1.f) { - sscal_(&ki, &scale, &work[*n + 1], &c__1); - sscal_(&ki, &scale, &work[n2 + 1], &c__1); - } - work[j - 1 + *n] = x[0]; - work[j + *n] = x[1]; - work[j - 1 + n2] = x[2]; - work[j + n2] = x[3]; - -/* Update the right-hand side */ - - i__1 = j - 2; - r__1 = -x[0]; - saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[*n + 1], &c__1); - i__1 = j - 2; - r__1 = -x[1]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - *n + 1], &c__1); - i__1 = j - 2; - r__1 = -x[2]; - saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, - &work[n2 + 1], &c__1); - i__1 = j - 2; - r__1 = -x[3]; - saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ - n2 + 1], &c__1); - } -L90: - ; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 - + 1], &c__1); - scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & - c__1); - - emax = 0.f; - i__1 = ki; - for (k = 1; k <= i__1; ++k) { -/* Computing MAX */ - r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1] - , dabs(r__1)) + (r__2 = vr[k + is * vr_dim1], - dabs(r__2)); - emax = dmax(r__3,r__4); -/* L100: */ - } - - remax = 1.f / emax; - sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); - sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - vr[k + (is - 1) * vr_dim1] = 0.f; - vr[k + is * vr_dim1] = 0.f; -/* L110: */ - } - - } else { - - if (ki > 2) { - i__1 = ki - 2; - sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, & - work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( - ki - 1) * vr_dim1 + 1], &c__1); - i__1 = ki - 2; - sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, & - work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * - vr_dim1 + 1], &c__1); - } else { - sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 - + 1], &c__1); - sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & - c__1); - } - - emax = 0.f; - i__1 = *n; - for (k = 1; k <= i__1; ++k) { -/* Computing MAX */ - r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1] - , dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1], - dabs(r__2)); - emax = dmax(r__3,r__4); -/* L120: */ - } - remax = 1.f / emax; - sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); - sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - } - - --is; - if (ip != 0) { - --is; - } -L130: - if (ip == 1) { - ip = 0; - } - if (ip == -1) { - ip = 1; - } -/* L140: */ - } - } - - if (leftv) { - -/* Compute left eigenvectors. */ - - ip = 0; - is = 1; - i__1 = *n; - for (ki = 1; ki <= i__1; ++ki) { - - if (ip == -1) { - goto L250; - } - if (ki == *n) { - goto L150; - } - if (t[ki + 1 + ki * t_dim1] == 0.f) { - goto L150; - } - ip = 1; - -L150: - if (somev) { - if (! select[ki]) { - goto L250; - } - } - -/* Compute the KI-th eigenvalue (WR,WI). */ - - wr = t[ki + ki * t_dim1]; - wi = 0.f; - if (ip != 0) { - wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) * - sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2))); - } -/* Computing MAX */ - r__1 = ulp * (dabs(wr) + dabs(wi)); - smin = dmax(r__1,smlnum); - - if (ip == 0) { - -/* Real left eigenvector. */ - - work[ki + *n] = 1.f; - -/* Form right-hand side */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - work[k + *n] = -t[ki + k * t_dim1]; -/* L160: */ - } - -/* - Solve the quasi-triangular system: - (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK -*/ - - vmax = 1.f; - vcrit = bignum; - - jnxt = ki + 1; - i__2 = *n; - for (j = ki + 1; j <= i__2; ++j) { - if (j < jnxt) { - goto L170; - } - j1 = j; - j2 = j; - jnxt = j + 1; - if (j < *n) { - if (t[j + 1 + j * t_dim1] != 0.f) { - j2 = j + 1; - jnxt = j + 2; - } - } - - if (j1 == j2) { - -/* - 1-by-1 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side. -*/ - - if (work[j] > vcrit) { - rec = 1.f / vmax; - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + *n], &c__1); - vmax = 1.f; - vcrit = bignum; - } - - i__3 = j - ki - 1; - work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], - &c__1, &work[ki + 1 + *n], &c__1); - -/* Solve (T(J,J)-WR)'*X = WORK */ - - slaln2_(&c_false, &c__1, &c__1, &smin, &c_b871, &t[j - + j * t_dim1], ldt, &c_b871, &c_b871, &work[j - + *n], n, &wr, &c_b1101, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.f) { - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + *n], &c__1); - } - work[j + *n] = x[0]; -/* Computing MAX */ - r__2 = (r__1 = work[j + *n], dabs(r__1)); - vmax = dmax(r__2,vmax); - vcrit = bignum / vmax; - - } else { - -/* - 2-by-2 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side. - - Computing MAX -*/ - r__1 = work[j], r__2 = work[j + 1]; - beta = dmax(r__1,r__2); - if (beta > vcrit) { - rec = 1.f / vmax; - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + *n], &c__1); - vmax = 1.f; - vcrit = bignum; - } - - i__3 = j - ki - 1; - work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], - &c__1, &work[ki + 1 + *n], &c__1); - - i__3 = j - ki - 1; - work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) * - t_dim1], &c__1, &work[ki + 1 + *n], &c__1); - -/* - Solve - [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) - [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) -*/ - - slaln2_(&c_true, &c__2, &c__1, &smin, &c_b871, &t[j + - j * t_dim1], ldt, &c_b871, &c_b871, &work[j + - *n], n, &wr, &c_b1101, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.f) { - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + *n], &c__1); - } - work[j + *n] = x[0]; - work[j + 1 + *n] = x[1]; - -/* Computing MAX */ - r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( - r__2 = work[j + 1 + *n], dabs(r__2)), r__3 = - max(r__3,r__4); - vmax = dmax(r__3,vmax); - vcrit = bignum / vmax; - - } -L170: - ; - } - -/* Copy the vector x or Q*x to VL and normalize. */ - - if (! over) { - i__2 = *n - ki + 1; - scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * - vl_dim1], &c__1); - - i__2 = *n - ki + 1; - ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - - 1; - remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1)); - i__2 = *n - ki + 1; - sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - vl[k + is * vl_dim1] = 0.f; -/* L180: */ - } - - } else { - - if (ki < *n) { - i__2 = *n - ki; - sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 1) * vl_dim1 - + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ - ki + *n], &vl[ki * vl_dim1 + 1], &c__1); - } - - ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1); - remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1)); - sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - - } - - } else { - -/* - Complex left eigenvector. - - Initial solve: - ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. - ((T(KI+1,KI) T(KI+1,KI+1)) ) -*/ - - if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 = - t[ki + 1 + ki * t_dim1], dabs(r__2))) { - work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; - work[ki + 1 + n2] = 1.f; - } else { - work[ki + *n] = 1.f; - work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; - } - work[ki + 1 + *n] = 0.f; - work[ki + n2] = 0.f; - -/* Form right-hand side */ - - i__2 = *n; - for (k = ki + 2; k <= i__2; ++k) { - work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; - work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] - ; -/* L190: */ - } - -/* - Solve complex quasi-triangular system: - ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 -*/ - - vmax = 1.f; - vcrit = bignum; - - jnxt = ki + 2; - i__2 = *n; - for (j = ki + 2; j <= i__2; ++j) { - if (j < jnxt) { - goto L200; - } - j1 = j; - j2 = j; - jnxt = j + 1; - if (j < *n) { - if (t[j + 1 + j * t_dim1] != 0.f) { - j2 = j + 1; - jnxt = j + 2; - } - } - - if (j1 == j2) { - -/* - 1-by-1 diagonal block - - Scale if necessary to avoid overflow when - forming the right-hand side elements. -*/ - - if (work[j] > vcrit) { - rec = 1.f / vmax; - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + n2], &c__1); - vmax = 1.f; - vcrit = bignum; - } - - i__3 = j - ki - 2; - work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + *n], &c__1); - i__3 = j - ki - 2; - work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + n2], &c__1); - -/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ - - r__1 = -wi; - slaln2_(&c_false, &c__1, &c__2, &smin, &c_b871, &t[j - + j * t_dim1], ldt, &c_b871, &c_b871, &work[j - + *n], n, &wr, &r__1, x, &c__2, &scale, & - xnorm, &ierr); - -/* Scale if necessary */ - - if (scale != 1.f) { - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + n2], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; -/* Computing MAX */ - r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( - r__2 = work[j + n2], dabs(r__2)), r__3 = max( - r__3,r__4); - vmax = dmax(r__3,vmax); - vcrit = bignum / vmax; - - } else { - -/* - 2-by-2 diagonal block - - Scale if necessary to avoid overflow when forming - the right-hand side elements. - - Computing MAX -*/ - r__1 = work[j], r__2 = work[j + 1]; - beta = dmax(r__1,r__2); - if (beta > vcrit) { - rec = 1.f / vmax; - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - sscal_(&i__3, &rec, &work[ki + n2], &c__1); - vmax = 1.f; - vcrit = bignum; - } - - i__3 = j - ki - 2; - work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + *n], &c__1); - - i__3 = j - ki - 2; - work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], - &c__1, &work[ki + 2 + n2], &c__1); - - i__3 = j - ki - 2; - work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * - t_dim1], &c__1, &work[ki + 2 + *n], &c__1); - - i__3 = j - ki - 2; - work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * - t_dim1], &c__1, &work[ki + 2 + n2], &c__1); - -/* - Solve 2-by-2 complex linear equation - ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B - ([T(j+1,j) T(j+1,j+1)] ) -*/ - - r__1 = -wi; - slaln2_(&c_true, &c__2, &c__2, &smin, &c_b871, &t[j + - j * t_dim1], ldt, &c_b871, &c_b871, &work[j + - *n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, - &ierr); - -/* Scale if necessary */ - - if (scale != 1.f) { - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + *n], &c__1); - i__3 = *n - ki + 1; - sscal_(&i__3, &scale, &work[ki + n2], &c__1); - } - work[j + *n] = x[0]; - work[j + n2] = x[2]; - work[j + 1 + *n] = x[1]; - work[j + 1 + n2] = x[3]; -/* Computing MAX */ - r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1, - r__2), r__2 = dabs(x[1]), r__1 = max(r__1, - r__2), r__2 = dabs(x[3]), r__1 = max(r__1, - r__2); - vmax = dmax(r__1,vmax); - vcrit = bignum / vmax; - - } -L200: - ; - } - -/* - Copy the vector x or Q*x to VL and normalize. - - L210: -*/ - if (! over) { - i__2 = *n - ki + 1; - scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * - vl_dim1], &c__1); - i__2 = *n - ki + 1; - scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * - vl_dim1], &c__1); - - emax = 0.f; - i__2 = *n; - for (k = ki; k <= i__2; ++k) { -/* Computing MAX */ - r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1], - dabs(r__1)) + (r__2 = vl[k + (is + 1) * - vl_dim1], dabs(r__2)); - emax = dmax(r__3,r__4); -/* L220: */ - } - remax = 1.f / emax; - i__2 = *n - ki + 1; - sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - i__2 = *n - ki + 1; - sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) - ; - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - vl[k + is * vl_dim1] = 0.f; - vl[k + (is + 1) * vl_dim1] = 0.f; -/* L230: */ - } - } else { - if (ki < *n - 1) { - i__2 = *n - ki - 1; - sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 2) * vl_dim1 - + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ - ki + *n], &vl[ki * vl_dim1 + 1], &c__1); - i__2 = *n - ki - 1; - sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 2) * vl_dim1 - + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ - ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & - c__1); - } else { - sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & - c__1); - sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 - + 1], &c__1); - } - - emax = 0.f; - i__2 = *n; - for (k = 1; k <= i__2; ++k) { -/* Computing MAX */ - r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1], - dabs(r__1)) + (r__2 = vl[k + (ki + 1) * - vl_dim1], dabs(r__2)); - emax = dmax(r__3,r__4); -/* L240: */ - } - remax = 1.f / emax; - sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); - - } - - } - - ++is; - if (ip != 0) { - ++is; - } -L250: - if (ip == -1) { - ip = 0; - } - if (ip == 1) { - ip = -1; - } - -/* L260: */ - } - - } - - return 0; - -/* End of STREVC */ - -} /* strevc_ */ - -/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - - /* Local variables */ - static integer j; - static real ajj; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); - static logical upper; - extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, - real *, integer *, real *, integer *), - xerbla_(char *, integer *); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - STRTI2 computes the inverse of a real upper or lower triangular - matrix. - - This is the Level 2 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading n by n upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("STRTI2", &i__1); - return 0; - } - - if (upper) { - -/* Compute inverse of upper triangular matrix. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (nounit) { - a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; - ajj = -a[j + j * a_dim1]; - } else { - ajj = -1.f; - } - -/* Compute elements 1:j-1 of j-th column. */ - - i__2 = j - 1; - strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & - a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* Compute inverse of lower triangular matrix. */ - - for (j = *n; j >= 1; --j) { - if (nounit) { - a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; - ajj = -a[j + j * a_dim1]; - } else { - ajj = -1.f; - } - if (j < *n) { - -/* Compute elements j+1:n of j-th column. */ - - i__1 = *n - j; - strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + - 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); - i__1 = *n - j; - sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - - return 0; - -/* End of STRTI2 */ - -} /* strti2_ */ - -/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, - integer *lda, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer j, jb, nb, nn; - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int strmm_(char *, char *, char *, char *, - integer *, integer *, real *, real *, integer *, real *, integer * - ), strsm_(char *, char *, char *, - char *, integer *, integer *, real *, real *, integer *, real *, - integer *), strti2_(char *, char * - , integer *, real *, integer *, integer *), - xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - STRTRI computes the inverse of a real upper or lower triangular - matrix A. - - This is the Level 3 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': A is upper triangular; - = 'L': A is lower triangular. - - DIAG (input) CHARACTER*1 - = 'N': A is non-unit triangular; - = 'U': A is unit triangular. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) REAL array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, A(i,i) is exactly zero. The triangular - matrix is singular and its inverse can not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("STRTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Check for singularity if non-unit. */ - - if (nounit) { - i__1 = *n; - for (*info = 1; *info <= i__1; ++(*info)) { - if (a[*info + *info * a_dim1] == 0.f) { - return 0; - } -/* L10: */ - } - *info = 0; - } - -/* - Determine the block size for this environment. - - Writing concatenation -*/ - i__2[0] = 1, a__1[0] = uplo; - i__2[1] = 1, a__1[1] = diag; - s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); - nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - strti2_(uplo, diag, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute inverse of upper triangular matrix */ - - i__1 = *n; - i__3 = nb; - for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { -/* Computing MIN */ - i__4 = nb, i__5 = *n - j + 1; - jb = min(i__4,i__5); - -/* Compute rows 1:j-1 of current block column */ - - i__4 = j - 1; - strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & - c_b871, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); - i__4 = j - 1; - strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & - c_b1150, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], - lda); - -/* Compute inverse of current diagonal block */ - - strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L20: */ - } - } else { - -/* Compute inverse of lower triangular matrix */ - - nn = (*n - 1) / nb * nb + 1; - i__3 = -nb; - for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { -/* Computing MIN */ - i__1 = nb, i__4 = *n - j + 1; - jb = min(i__1,i__4); - if (j + jb <= *n) { - -/* Compute rows j+jb:n of current block column */ - - i__1 = *n - j - jb + 1; - strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, - &c_b871, &a[j + jb + (j + jb) * a_dim1], lda, &a[ - j + jb + j * a_dim1], lda); - i__1 = *n - j - jb + 1; - strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, - &c_b1150, &a[j + j * a_dim1], lda, &a[j + jb + j - * a_dim1], lda); - } - -/* Compute inverse of current diagonal block */ - - strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L30: */ - } - } - } - - return 0; - -/* End of STRTRI */ - -} /* strtri_ */ - diff --git a/numpy/linalg/lapack_lite/f2c.c b/numpy/linalg/lapack_lite/f2c.c new file mode 100644 index 000000000..89feb3885 --- /dev/null +++ b/numpy/linalg/lapack_lite/f2c.c @@ -0,0 +1,680 @@ +/* + Functions here are copied from the source code for libf2c. + + Typically each function there is in its own file. + + We don't link against libf2c directly, because we can't guarantee + it is available, and shipping a static library isn't portable. +*/ + +#include +#include +#include +#include +#include "f2c.h" + + +extern void s_wsfe(cilist *f) {;} +extern void e_wsfe(void) {;} +extern void do_fio(integer *c, char *s, ftnlen l) {;} + +/* You'll want this if you redo the f2c_*.c files with the -C option + * to f2c for checking array subscripts. (It's not suggested you do that + * for production use, of course.) */ +extern int +s_rnge(char *var, int index, char *routine, int lineno) +{ + fprintf(stderr, "array index out-of-bounds for %s[%d] in routine %s:%d\n", + var, index, routine, lineno); + fflush(stderr); + abort(); +} + +#ifdef KR_headers +extern float sqrtf(); +double f__cabsf(real, imag) float real, imag; +#else +#undef abs + +double f__cabsf(float real, float imag) +#endif +{ +float temp; + +if(real < 0.0f) + real = -real; +if(imag < 0.0f) + imag = -imag; +if(imag > real){ + temp = real; + real = imag; + imag = temp; +} +if((imag+real) == real) + return((float)real); + +temp = imag/real; +temp = real*sqrtf(1.0 + temp*temp); /*overflow!!*/ +return(temp); +} + + +#ifdef KR_headers +extern double sqrt(); +double f__cabs(real, imag) double real, imag; +#else +#undef abs + +double f__cabs(double real, double imag) +#endif +{ +double temp; + +if(real < 0) + real = -real; +if(imag < 0) + imag = -imag; +if(imag > real){ + temp = real; + real = imag; + imag = temp; +} +if((imag+real) == real) + return((double)real); + +temp = imag/real; +temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ +return(temp); +} + + VOID +#ifdef KR_headers +r_cnjg(r, z) complex *r, *z; +#else +r_cnjg(complex *r, complex *z) +#endif +{ +r->r = z->r; +r->i = - z->i; +} + + VOID +#ifdef KR_headers +d_cnjg(r, z) doublecomplex *r, *z; +#else +d_cnjg(doublecomplex *r, doublecomplex *z) +#endif +{ +r->r = z->r; +r->i = - z->i; +} + + +#ifdef KR_headers +float r_imag(z) complex *z; +#else +float r_imag(complex *z) +#endif +{ +return(z->i); +} + +#ifdef KR_headers +double d_imag(z) doublecomplex *z; +#else +double d_imag(doublecomplex *z) +#endif +{ +return(z->i); +} + + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +float logf(); +float r_lg10(x) real *x; +#else +#undef abs + +float r_lg10(real *x) +#endif +{ +return( log10e * logf(*x) ); +} + +#ifdef KR_headers +double log(); +double d_lg10(x) doublereal *x; +#else +#undef abs + +double d_lg10(doublereal *x) +#endif +{ +return( log10e * log(*x) ); +} + +#ifdef KR_headers +double r_sign(a,b) real *a, *b; +#else +double r_sign(real *a, real *b) +#endif +{ +float x; +x = (*a >= 0.0f ? *a : - *a); +return( *b >= 0.0f ? x : -x); +} + +#ifdef KR_headers +double d_sign(a,b) doublereal *a, *b; +#else +double d_sign(doublereal *a, doublereal *b) +#endif +{ +double x; +x = (*a >= 0 ? *a : - *a); +return( *b >= 0 ? x : -x); +} + + +#ifdef KR_headers +double floor(); +integer i_dnnt(x) doublereal *x; +#else +#undef abs + +integer i_dnnt(doublereal *x) +#endif +{ +return( (*x)>=0 ? + floor(*x + .5) : -floor(.5 - *x) ); +} + + +#ifdef KR_headers +double pow(); +double pow_dd(ap, bp) doublereal *ap, *bp; +#else +#undef abs + +double pow_dd(doublereal *ap, doublereal *bp) +#endif +{ +return(pow(*ap, *bp) ); +} + + +#ifdef KR_headers +double pow_ri(ap, bp) real *ap; integer *bp; +#else +double pow_ri(real *ap, integer *bp) +#endif +{ +float pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1.0f/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} + +#ifdef KR_headers +double pow_di(ap, bp) doublereal *ap; integer *bp; +#else +double pow_di(doublereal *ap, integer *bp) +#endif +{ +double pow, x; +integer n; +unsigned long u; + +pow = 1; +x = *ap; +n = *bp; + +if(n != 0) + { + if(n < 0) + { + n = -n; + x = 1/x; + } + for(u = n; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + } +return(pow); +} +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ +#define NO_OVERWRITE + + +#ifndef NO_OVERWRITE + +#undef abs +#ifdef KR_headers + extern char *F77_aloc(); + extern void free(); + extern void exit_(); +#else + + extern char *F77_aloc(ftnlen, char*); +#endif + +#endif /* NO_OVERWRITE */ + + VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll; +#else +s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while(i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for(i = 0 ; i < n ; ++i) { + nc = ll; + if(rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while(--nc >= 0) + *lp++ = *rp++; + } + while(--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memmove(lp0, lp1, L); + free(lp1); + } +#endif + } + + +/* compare two strings */ + +#ifdef KR_headers +integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb; +#else +integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) +#endif +{ +register unsigned char *a, *aend, *b, *bend; +a = (unsigned char *)a0; +b = (unsigned char *)b0; +aend = a + la; +bend = b + lb; + +if(la <= lb) + { + while(a < aend) + if(*a != *b) + return( *a - *b ); + else + { ++a; ++b; } + + while(b < bend) + if(*b != ' ') + return( ' ' - *b ); + else ++b; + } + +else + { + while(b < bend) + if(*a == *b) + { ++a; ++b; } + else + return( *a - *b ); + while(a < aend) + if(*a != ' ') + return(*a - ' '); + else ++a; + } +return(0); +} +/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the + * target of an assignment to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90), + * as in a(2:5) = a(4:7) . + */ + + + +/* assign strings: a = b */ + +#ifdef KR_headers +VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb; +#else +void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) +#endif +{ + register char *aend, *bend; + + aend = a + la; + + if(la <= lb) +#ifndef NO_OVERWRITE + if (a <= b || a >= b + la) +#endif + while(a < aend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else + for(b += la; a < aend; ) + *--aend = *--b; +#endif + + else { + bend = b + lb; +#ifndef NO_OVERWRITE + if (a <= b || a >= bend) +#endif + while(b < bend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else { + a += lb; + while(b < bend) + *--a = *--bend; + a += lb; + } +#endif + while(a < aend) + *a++ = ' '; + } + } + + +#ifdef KR_headers +double f__cabsf(); +double c_abs(z) complex *z; +#else +double f__cabsf(float, float); +double c_abs(complex *z) +#endif +{ +return( f__cabsf( z->r, z->i ) ); +} + +#ifdef KR_headers +double f__cabs(); +double z_abs(z) doublecomplex *z; +#else +double f__cabs(double, double); +double z_abs(doublecomplex *z) +#endif +{ +return( f__cabs( z->r, z->i ) ); +} + + +#ifdef KR_headers +extern void sig_die(); +VOID c_div(c, a, b) complex *a, *b, *c; +#else +extern void sig_die(char*, int); +void c_div(complex *c, complex *a, complex *b) +#endif +{ +float ratio, den; +float abr, abi; + +if( (abr = b->r) < 0.f) + abr = - abr; +if( (abi = b->i) < 0.f) + abi = - abi; +if( abr <= abi ) + { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1);*/ + ratio = b->r / b->i ; + den = b->i * (1 + ratio*ratio); + c->r = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + +else + { + ratio = b->i / b->r ; + den = b->r * (1.f + ratio*ratio); + c->r = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + +} + +#ifdef KR_headers +extern void sig_die(); +VOID z_div(c, a, b) doublecomplex *a, *b, *c; +#else +extern void sig_die(char*, int); +void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) +#endif +{ +double ratio, den; +double abr, abi; + +if( (abr = b->r) < 0.) + abr = - abr; +if( (abi = b->i) < 0.) + abi = - abi; +if( abr <= abi ) + { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1);*/ + ratio = b->r / b->i ; + den = b->i * (1 + ratio*ratio); + c->r = (a->r*ratio + a->i) / den; + c->i = (a->i*ratio - a->r) / den; + } + +else + { + ratio = b->i / b->r ; + den = b->r * (1 + ratio*ratio); + c->r = (a->r + a->i*ratio) / den; + c->i = (a->i - a->r*ratio) / den; + } + +} + + +#ifdef KR_headers +float sqrtf(), f__cabsf(); +VOID c_sqrt(r, z) complex *r, *z; +#else +#undef abs + +extern double f__cabsf(float, float); +void c_sqrt(complex *r, complex *z) +#endif +{ +float mag; + +if( (mag = f__cabsf(z->r, z->i)) == 0.f) + r->r = r->i = 0.f; +else if(z->r > 0.0f) + { + r->r = sqrtf(0.5f * (mag + z->r) ); + r->i = z->i / r->r / 2.0f; + } +else + { + r->i = sqrtf(0.5f * (mag - z->r) ); + if(z->i < 0.0f) + r->i = - r->i; + r->r = z->i / r->i / 2.0f; + } +} + + +#ifdef KR_headers +double sqrt(), f__cabs(); +VOID z_sqrt(r, z) doublecomplex *r, *z; +#else +#undef abs + +extern double f__cabs(double, double); +void z_sqrt(doublecomplex *r, doublecomplex *z) +#endif +{ +double mag; + +if( (mag = f__cabs(z->r, z->i)) == 0.) + r->r = r->i = 0.; +else if(z->r > 0) + { + r->r = sqrt(0.5 * (mag + z->r) ); + r->i = z->i / r->r / 2; + } +else + { + r->i = sqrt(0.5 * (mag - z->r) ); + if(z->i < 0) + r->i = - r->i; + r->r = z->i / r->i / 2; + } +} +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers +integer pow_ii(ap, bp) integer *ap, *bp; +#else +integer pow_ii(integer *ap, integer *bp) +#endif +{ + integer pow, x, n; + unsigned long u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x == 0 ? 1/x : 0; + n = -n; + } + u = n; + for(pow = 1; ; ) + { + if(u & 01) + pow *= x; + if(u >>= 1) + x *= x; + else + break; + } + return(pow); + } +#ifdef __cplusplus +} +#endif + +#ifdef KR_headers +extern void f_exit(); +VOID s_stop(s, n) char *s; ftnlen n; +#else +#undef abs +#undef min +#undef max +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +extern "C" { +#endif +void f_exit(void); + +int s_stop(char *s, ftnlen n) +#endif +{ +int i; + +if(n > 0) + { + fprintf(stderr, "STOP "); + for(i = 0; ir, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + i__4 = ix; + q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__; + q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; +/* L30: */ + } + return 0; +} /* caxpy_ */ + +/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + + +/* + copies a vector, x, to a vector, y. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i; +/* L30: */ + } + return 0; +} /* ccopy_ */ + +/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + forms the dot product of two vectors, conjugating the first + vector. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + ctemp.r = 0.f, ctemp.i = 0.f; + ret_val->r = 0.f, ret_val->i = 0.f; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + r_cnjg(&q__3, &cx[ix]); + i__2 = iy; + q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * + cy[i__2].i + q__3.i * cy[i__2].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + r_cnjg(&q__3, &cx[i__]); + i__2 = i__; + q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * + cy[i__2].i + q__3.i * cy[i__2].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; +/* L30: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; +} /* cdotc_ */ + +/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + complex q__1, q__2; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + forms the dot product of two vectors. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + ctemp.r = 0.f, ctemp.i = 0.f; + ret_val->r = 0.f, ret_val->i = 0.f; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = + cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = + cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r; + q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; +/* L30: */ + } + ret_val->r = ctemp.r, ret_val->i = ctemp.i; + return ; +} /* cdotu_ */ + +/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, + integer *ldb, complex *beta, complex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static complex temp; + static logical conja, conjb; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = conjg( A' ). + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = conjg( B' ). + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + conjugated or transposed, set CONJA and CONJB as true if A and + B respectively are to be transposed but not conjugated and set + NROWA, NCOLA and NROWB as the number of rows and columns of A + and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + conja = lsame_(transa, "C"); + conjb = lsame_(transb, "C"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! conja && ! lsame_(transa, "T")) { + info = 1; + } else if (! notb && ! conjb && ! lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("CGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) + && (beta->r == 1.f && beta->i == 0.f)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + q__1.i = beta->r * c__[i__4].i + beta->i * c__[ + i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = l + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else if (conja) { + +/* Form C := alpha*conjg( A' )*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L110: */ + } +/* L120: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = l + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L140: */ + } +/* L150: */ + } + } + } else if (nota) { + if (conjb) { + +/* Form C := alpha*A*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L160: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L170: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0.f && beta->i == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L210: */ + } + } else if (beta->r != 1.f || beta->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L220: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = j + l * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L230: */ + } + } +/* L240: */ + } +/* L250: */ + } + } + } else if (conja) { + if (conjb) { + +/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + r_cnjg(&q__4, &b[j + l * b_dim1]); + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = + q__3.r * q__4.i + q__3.i * q__4.r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L260: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L270: */ + } +/* L280: */ + } + } else { + +/* Form C := alpha*conjg( A' )*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = j + l * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L290: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L300: */ + } +/* L310: */ + } + } + } else { + if (conjb) { + +/* Form C := alpha*A'*conjg( B' ) + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + r_cnjg(&q__3, &b[j + l * b_dim1]); + q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i, + q__2.i = a[i__4].r * q__3.i + a[i__4].i * + q__3.r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L320: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L330: */ + } +/* L340: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = j + l * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L350: */ + } + if (beta->r == 0.f && beta->i == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, + q__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, q__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L360: */ + } +/* L370: */ + } + } + } + + return 0; + +/* End of CGEMM . */ + +} /* cgemm_ */ + +/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj; + + +/* + Purpose + ======= + + CGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or + + y := alpha*conjg( A' )*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - COMPLEX array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("CGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r + == 1.f && beta->i == 0.f)) { + return 0; + } + + noconj = lsame_(trans, "T"); + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0.f, temp.i = 0.f; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = + alpha->r * temp.i + alpha->i * temp.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jy += *incy; +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0.f, temp.i = 0.f; + ix = kx; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L120: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L130: */ + } + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = + alpha->r * temp.i + alpha->i * temp.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jy += *incy; +/* L140: */ + } + } + } + + return 0; + +/* End of CGEMV . */ + +} /* cgemv_ */ + +/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static complex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGERC performs the rank 1 operation + + A := alpha*x*conjg( y' ) + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CGERC ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of CGERC . */ + +} /* cgerc_ */ + +/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static complex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CGERU performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CGERU ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + i__2 = jy; + q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0.f || y[i__2].i != 0.f) { + i__2 = jy; + q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of CGERU . */ + +} /* cgeru_ */ + +/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex * + a, integer *lda, complex *x, integer *incx, complex *beta, complex *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHEMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("CHEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && + beta->i == 0.f)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L90: */ + } + i__2 = j; + i__3 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + r__1 = a[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of CHEMV . */ + +} /* chemv_ */ + +/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex * + x, integer *incx, complex *y, integer *incy, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHER2 performs the hermitian rank 2 operation + + A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n hermitian matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("CHER2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[j]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = j; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = i__; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = jx; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = iy; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + ix += *incx; + iy += *incy; +/* L30: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[j]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = j; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = i__; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f + || y[i__3].i != 0.f)) { + r_cnjg(&q__2, &y[jy]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = + alpha->r * q__2.i + alpha->i * q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__2 = jx; + q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + q__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + q__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = a[i__3].r + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + q__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + + q__3.i; + i__6 = iy; + q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + q__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L70: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of CHER2 . */ + +} /* cher2_ */ + +/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k, + complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, + real *beta, complex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7; + real r__1; + complex q__1, q__2, q__3, q__4, q__5, q__6; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, l, info; + static complex temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHER2K performs one of the hermitian rank 2k operations + + C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, + + where alpha and beta are scalars with beta real, C is an n by n + hermitian matrix and A and B are n by k matrices in the first case + and k by n matrices in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + + conjg( alpha )*B*conjg( A' ) + + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + + conjg( alpha )*conjg( B' )*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("CHER2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta == + 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* + Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != + 0.f || b[i__4].i != 0.f)) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__3 = j + l * a_dim1; + q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + q__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, q__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] + .i + q__3.i; + i__7 = i__ + l * b_dim1; + q__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, q__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + + q__4.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + q__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + q__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L150: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != + 0.f || b[i__4].i != 0.f)) { + r_cnjg(&q__2, &b[j + l * b_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, + q__1.i = alpha->r * q__2.i + alpha->i * + q__2.r; + temp1.r = q__1.r, temp1.i = q__1.i; + i__3 = j + l * a_dim1; + q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + q__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + r_cnjg(&q__1, &q__2); + temp2.r = q__1.r, temp2.i = q__1.i; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, q__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5] + .i + q__3.i; + i__7 = i__ + l * b_dim1; + q__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, q__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + + q__4.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L160: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + q__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + q__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* + Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1.r = 0.f, temp1.i = 0.f; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; + temp1.r = q__1.r, temp1.i = q__1.i; + r_cnjg(&q__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L190: */ + } + if (i__ == j) { + if (*beta == 0.f) { + i__3 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = *beta * c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } + } else { + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * + c__[i__4].i; + q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + + q__4.i; + r_cnjg(&q__6, alpha); + q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, + q__5.i = q__6.r * temp2.i + q__6.i * + temp2.r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + + q__5.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1.r = 0.f, temp1.i = 0.f; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, + q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4] + .r; + q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i; + temp1.r = q__1.r, temp1.i = q__1.i; + r_cnjg(&q__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L220: */ + } + if (i__ == j) { + if (*beta == 0.f) { + i__3 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + r__1 = *beta * c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } + } else { + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + r_cnjg(&q__4, alpha); + q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, + q__3.i = q__4.r * temp2.i + q__4.i * + temp2.r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * + c__[i__4].i; + q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + q__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + + q__4.i; + r_cnjg(&q__6, alpha); + q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, + q__5.i = q__6.r * temp2.i + q__6.i * + temp2.r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + + q__5.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of CHER2K. */ + +} /* cher2k_ */ + +/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, complex *a, integer *lda, real *beta, complex *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + real r__1; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, l, info; + static complex temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static real rtemp; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + CHERK performs one of the hermitian rank k operations + + C := alpha*A*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*A + beta*C, + + where alpha and beta are real scalars, C is an n by n hermitian + matrix and A is an n by k matrix in the first case and a k by n + matrix in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("CHERK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*conjg( A' ) + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + r_cnjg(&q__2, &a[j + l * a_dim1]); + q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0.f, c__[i__3].i = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L150: */ + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + r_cnjg(&q__2, &a[j + l * a_dim1]); + q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + r__1 = c__[i__4].r + q__1.r; + c__[i__3].r = r__1, c__[i__3].i = 0.f; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + q__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5] + .i + q__2.i; + c__[i__4].r = q__1.r, c__[i__4].i = q__1.i; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*conjg( A' )*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L190: */ + } + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L200: */ + } + rtemp = 0.f; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + r_cnjg(&q__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = + q__3.r * a[i__3].i + q__3.i * a[i__3].r; + q__1.r = rtemp + q__2.r, q__1.i = q__2.i; + rtemp = q__1.r; +/* L210: */ + } + if (*beta == 0.f) { + i__2 = j + j * c_dim1; + r__1 = *alpha * rtemp; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } +/* L220: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.f; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + r_cnjg(&q__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i = + q__3.r * a[i__3].i + q__3.i * a[i__3].r; + q__1.r = rtemp + q__2.r, q__1.i = q__2.i; + rtemp = q__1.r; +/* L230: */ + } + if (*beta == 0.f) { + i__2 = j + j * c_dim1; + r__1 = *alpha * rtemp; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + r__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = r__1, c__[i__2].i = 0.f; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0.f, temp.i = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + r_cnjg(&q__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, + q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4] + .r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L240: */ + } + if (*beta == 0.f) { + i__3 = i__ + j * c_dim1; + q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } else { + i__3 = i__ + j * c_dim1; + q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; + } +/* L250: */ + } +/* L260: */ + } + } + } + + return 0; + +/* End of CHERK . */ + +} /* cherk_ */ + +/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer * + incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__, nincx; + + +/* + scales a vector by a constant. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[ + i__4].i + ca->i * cx[i__4].r; + cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + i__3 = i__; + q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[ + i__3].i + ca->i * cx[i__3].r; + cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; +/* L30: */ + } + return 0; +} /* cscal_ */ + +/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + real r__1, r__2; + complex q__1; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer i__, nincx; + + +/* + scales a complex vector by a real constant. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + r__1 = *sa * cx[i__4].r; + r__2 = *sa * r_imag(&cx[i__]); + q__1.r = r__1, q__1.i = r__2; + cx[i__3].r = q__1.r, cx[i__3].i = q__1.i; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + i__3 = i__; + r__1 = *sa * cx[i__3].r; + r__2 = *sa * r_imag(&cx[i__]); + q__1.r = r__1, q__1.i = r__2; + cx[i__1].r = q__1.r, cx[i__1].i = q__1.i; +/* L30: */ + } + return 0; +} /* csscal_ */ + +/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + interchanges two vectors. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; + i__2 = ix; + i__3 = iy; + cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; + i__2 = iy; + cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i; + i__2 = i__; + i__3 = i__; + cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i; + i__2 = i__; + cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* cswap_ */ + +/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, k, info; + static complex temp; + extern logical lsame_(char *, char *); + static logical lside; + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ) + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("CTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + i__3 = k + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * a_dim1; + q__2.r = temp.r * a[i__6].r - temp.i * a[i__6] + .i, q__2.i = temp.r * a[i__6].i + + temp.i * a[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L30: */ + } + if (nounit) { + i__3 = k + k * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, q__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = k + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0.f || b[i__2].i != 0.f) { + i__2 = k + j * b_dim1; + q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] + .i, q__1.i = alpha->r * b[i__2].i + + alpha->i * b[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = k + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + if (nounit) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k + k * a_dim1; + q__1.r = b[i__3].r * a[i__4].r - b[i__3].i * + a[i__4].i, q__1.i = b[i__3].r * a[ + i__4].i + b[i__3].i * a[i__4].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5] + .i, q__2.i = temp.r * a[i__5].i + + temp.i * a[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + if (noconj) { + if (nounit) { + i__2 = i__ + i__ * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2] + .i, q__1.i = temp.r * a[i__2].i + + temp.i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, q__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] + .i, q__2.i = q__3.r * b[i__3].i + + q__3.i * b[i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__2 = i__ + j * b_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L110: */ + } +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + if (noconj) { + if (nounit) { + i__3 = i__ + i__ * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, q__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, q__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] + .i, q__2.i = q__3.r * b[i__4].i + + q__3.i * b[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L140: */ + } + } + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * temp.r - alpha->i * temp.i, + q__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[i__1] + .r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[i__3] + .r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L170: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + i__2 = k + j * a_dim1; + q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] + .i, q__1.i = alpha->r * a[i__2].i + + alpha->i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[i__2] + .r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[i__4] + .r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L210: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + i__3 = k + j * a_dim1; + q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] + .i, q__1.i = alpha->r * a[i__3].i + + alpha->i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L220: */ + } + } +/* L230: */ + } +/* L240: */ + } + } + } else { + +/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + if (noconj) { + i__3 = j + k * a_dim1; + q__1.r = alpha->r * a[i__3].r - alpha->i * a[ + i__3].i, q__1.i = alpha->r * a[i__3] + .i + alpha->i * a[i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[j + k * a_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * + q__2.i, q__1.i = alpha->r * q__2.i + + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5] + .i + q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L250: */ + } + } +/* L260: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__2 = k + k * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + } + if (temp.r != 1.f || temp.i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L270: */ + } + } +/* L280: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + if (noconj) { + i__2 = j + k * a_dim1; + q__1.r = alpha->r * a[i__2].r - alpha->i * a[ + i__2].i, q__1.i = alpha->r * a[i__2] + .i + alpha->i * a[i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[j + k * a_dim1]); + q__1.r = alpha->r * q__2.r - alpha->i * + q__2.i, q__1.i = alpha->r * q__2.i + + alpha->i * q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4] + .i + q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L290: */ + } + } +/* L300: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__1 = k + k * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + } + if (temp.r != 1.f || temp.i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L310: */ + } + } +/* L320: */ + } + } + } + } + + return 0; + +/* End of CTRMM . */ + +} /* ctrmm_ */ + +/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static complex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, or x := conjg( A' )*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := conjg( A' )*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("CTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, q__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = j + j * a_dim1; + q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, q__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = j + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, q__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = ix; + i__3 = ix; + i__4 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = j + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, q__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__1 = i__; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + q__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = i__ + j * a_dim1; + i__2 = ix; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L120: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__1 = ix; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L180: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of CTRMV . */ + +} /* ctrmv_ */ + +/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, complex *alpha, complex *a, integer *lda, + complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, k, info; + static complex temp; + extern logical lsame_(char *, char *); + static logical lside; + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("CTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0.f && alpha->i == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0.f, b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0.f || b[i__2].i != 0.f) { + if (nounit) { + i__2 = k + j * b_dim1; + c_div(&q__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__2].r = q__1.r, b[i__2].i = q__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + q__2.r = b[i__5].r * a[i__6].r - b[i__5].i * + a[i__6].i, q__2.i = b[i__5].r * a[ + i__6].i + b[i__5].i * a[i__6].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0.f || b[i__3].i != 0.f) { + if (nounit) { + i__3 = k + j * b_dim1; + c_div(&q__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__3].r = q__1.r, b[i__3].i = q__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + q__2.r = b[i__6].r * a[i__7].r - b[i__6].i * + a[i__7].i, q__2.i = b[i__6].r * a[ + i__7].i + b[i__6].i * a[i__7].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* + Form B := alpha*inv( A' )*B + or B := alpha*inv( conjg( A' ) )*B. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + q__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, q__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L110: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4] + .i, q__2.i = q__3.r * b[i__4].i + + q__3.i * b[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L120: */ + } + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; +/* L130: */ + } +/* L140: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + q__1.i = alpha->r * b[i__2].i + alpha->i * b[ + i__2].r; + temp.r = q__1.r, temp.i = q__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, q__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + r_cnjg(&q__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3] + .i, q__2.i = q__3.r * b[i__3].i + + q__3.i * b[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + if (nounit) { + r_cnjg(&q__2, &a[i__ + i__ * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; +/* L170: */ + } +/* L180: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L190: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * + b[i__7].i, q__2.i = a[i__6].r * b[ + i__7].i + a[i__6].i * b[i__7].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L200: */ + } + } +/* L210: */ + } + if (nounit) { + c_div(&q__1, &c_b21, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L220: */ + } + } +/* L230: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1.f || alpha->i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L240: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = a[i__5].r * b[i__6].r - a[i__5].i * + b[i__6].i, q__2.i = a[i__5].r * b[ + i__6].i + a[i__5].i * b[i__6].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L250: */ + } + } +/* L260: */ + } + if (nounit) { + c_div(&q__1, &c_b21, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L270: */ + } + } +/* L280: */ + } + } + } else { + +/* + Form B := alpha*B*inv( A' ) + or B := alpha*B*inv( conjg( A' ) ). +*/ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + c_div(&q__1, &c_b21, &a[k + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + c_div(&q__1, &c_b21, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + q__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L290: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + r_cnjg(&q__1, &a[j + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, q__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4] + .i - q__2.i; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L300: */ + } + } +/* L310: */ + } + if (alpha->r != 1.f || alpha->i != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, q__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = q__1.r, b[i__2].i = q__1.i; +/* L320: */ + } + } +/* L330: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + c_div(&q__1, &c_b21, &a[k + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } else { + r_cnjg(&q__2, &a[k + k * a_dim1]); + c_div(&q__1, &c_b21, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + q__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L340: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0.f || a[i__3].i != 0.f) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + r_cnjg(&q__1, &a[j + k * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + q__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, q__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5] + .i - q__2.i; + b[i__4].r = q__1.r, b[i__4].i = q__1.i; +/* L350: */ + } + } +/* L360: */ + } + if (alpha->r != 1.f || alpha->i != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, q__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = q__1.r, b[i__3].i = q__1.i; +/* L370: */ + } + } +/* L380: */ + } + } + } + } + + return 0; + +/* End of CTRSM . */ + +} /* ctrsm_ */ + +/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, + complex *a, integer *lda, complex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static complex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + CTRSV solves one of the systems of equations + + A*x = b, or A'*x = b, or conjg( A' )*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular matrix. + + No test for singularity or near-singularity is included in this + routine. Such tests must be performed before calling this routine. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the equations to be solved as + follows: + + TRANS = 'N' or 'n' A*x = b. + + TRANS = 'T' or 't' A'*x = b. + + TRANS = 'C' or 'c' conjg( A' )*x = b. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element right-hand side vector b. On exit, X is overwritten + with the solution vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("CTRSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + if (nounit) { + i__1 = j; + c_div(&q__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + if (nounit) { + i__1 = jx; + c_div(&q__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = q__1.r, x[i__1].i = q__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + if (nounit) { + i__2 = j; + c_div(&q__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + if (nounit) { + i__2 = jx; + c_div(&q__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - + q__2.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L110: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, q__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L120: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, + q__2.i = q__3.r * x[i__3].i + q__3.i * x[ + i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L130: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, q__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__2 = i__; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, + q__2.i = q__3.r * x[i__2].i + q__3.i * x[ + i__2].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L170: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, q__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L180: */ + } + if (nounit) { + c_div(&q__1, &temp, &a[j + j * a_dim1]); + temp.r = q__1.r, temp.i = q__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + r_cnjg(&q__3, &a[i__ + j * a_dim1]); + i__2 = ix; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, + q__2.i = q__3.r * x[i__2].i + q__3.i * x[ + i__2].r; + q__1.r = temp.r - q__2.r, q__1.i = temp.i - + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L190: */ + } + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + c_div(&q__1, &temp, &q__2); + temp.r = q__1.r, temp.i = q__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of CTRSV . */ + +} /* ctrsv_ */ + +/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + constant times a vector plus a vector. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*da == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] += *da * dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 4; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] += *da * dx[i__]; +/* L30: */ + } + if (*n < 4) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + dy[i__] += *da * dx[i__]; + dy[i__ + 1] += *da * dx[i__ + 1]; + dy[i__ + 2] += *da * dx[i__ + 2]; + dy[i__ + 3] += *da * dx[i__ + 3]; +/* L50: */ + } + return 0; +} /* daxpy_ */ + +doublereal dcabs1_(doublecomplex *z__) +{ + /* System generated locals */ + doublereal ret_val; + static doublecomplex equiv_0[1]; + + /* Local variables */ +#define t ((doublereal *)equiv_0) +#define zz (equiv_0) + + zz->r = z__->r, zz->i = z__->i; + ret_val = abs(t[0]) + abs(t[1]); + return ret_val; +} /* dcabs1_ */ + +#undef zz +#undef t + + +/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + copies a vector, x, to a vector, y. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; +/* L50: */ + } + return 0; +} /* dcopy_ */ + +doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static doublereal dtemp; + + +/* + forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[ix] * dy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = dtemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[i__] * dy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[ + i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + + 4] * dy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = dtemp; + return ret_val; +} /* ddot_ */ + +/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static doublereal temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("DGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L130: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of DGEMM . */ + +} /* dgemm_ */ + +/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - DOUBLE PRECISION array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - DOUBLE PRECISION array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("DGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DGEMV . */ + +} /* dgemv_ */ + +/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublereal temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DGER performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("DGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of DGER . */ + +} /* dger_ */ + +doublereal dnrm2_(integer *n, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer ix; + static doublereal ssq, norm, scale, absxi; + + +/* + DNRM2 returns the euclidean norm of a vector via the function + name, so that + + DNRM2 := sqrt( x'*x ) + + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to DLASSQ. + Sven Hammarling, Nag Ltd. +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else if (*n == 1) { + norm = abs(x[1]); + } else { + scale = 0.; + ssq = 1.; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL DLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (scale < absxi) { +/* Computing 2nd power */ + d__1 = scale / absxi; + ssq = ssq * (d__1 * d__1) + 1.; + scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DNRM2. */ + +} /* dnrm2_ */ + +/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ix, iy; + static doublereal dtemp; + + +/* + applies a plane rotation. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[ix] + *s * dy[iy]; + dy[iy] = *c__ * dy[iy] - *s * dx[ix]; + dx[ix] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[i__] + *s * dy[i__]; + dy[i__] = *c__ * dy[i__] - *s * dx[i__]; + dx[i__] = dtemp; +/* L30: */ + } + return 0; +} /* drot_ */ + +/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + scales a vector by a constant. + uses unrolled loops for increment equal to one. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + dx[i__] = *da * dx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; +/* L50: */ + } + return 0; +} /* dscal_ */ + +/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static doublereal dtemp; + + +/* + interchanges two vectors. + uses unrolled loops for increments equal one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[ix]; + dx[ix] = dy[iy]; + dy[iy] = dtemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; +/* L30: */ + } + if (*n < 3) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + dtemp = dx[i__ + 1]; + dx[i__ + 1] = dy[i__ + 1]; + dy[i__ + 1] = dtemp; + dtemp = dx[i__ + 2]; + dx[i__ + 2] = dy[i__ + 2]; + dy[i__ + 2] = dtemp; +/* L50: */ + } + return 0; +} /* dswap_ */ + +/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("DSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSYMV . */ + +} /* dsymv_ */ + +/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYR2 performs the symmetric rank 2 operation + + A := alpha*x*y' + alpha*y*x' + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n symmetric matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("DSYR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of DSYR2 . */ + +} /* dsyr2_ */ + +/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static doublereal temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYR2K performs one of the symmetric rank 2k operations + + C := alpha*A*B' + alpha*B*A' + beta*C, + + or + + C := alpha*A'*B + alpha*B'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A and B are n by k matrices in the first case and k by n + matrices in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("DSYR2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*B' + alpha*B*A' + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*B + alpha*B'*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYR2K. */ + +} /* dsyr2k_ */ + +/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static doublereal temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + DSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("DSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYRK . */ + +} /* dsyrk_ */ + +/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublereal temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ), + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("DTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of DTRMM . */ + +} /* dtrmm_ */ + +/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublereal temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := A'*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - DOUBLE PRECISION array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("DTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRMV . */ + +} /* dtrmv_ */ + +/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static doublereal temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + DTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION. + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("DTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of DTRSM . */ + +} /* dtrsm_ */ + +doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + static integer i__, ix; + static doublereal stemp; + extern doublereal dcabs1_(doublecomplex *); + + +/* + takes the sum of the absolute values. + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0.; + stemp = 0.; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += dcabs1_(&zx[ix]); + ix += *incx; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += dcabs1_(&zx[i__]); +/* L30: */ + } + ret_val = stemp; + return ret_val; +} /* dzasum_ */ + +doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal ret_val, d__1; + + /* Builtin functions */ + double d_imag(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + static integer ix; + static doublereal ssq, temp, norm, scale; + + +/* + DZNRM2 returns the euclidean norm of a vector via the function + name, so that + + DZNRM2 := sqrt( conjg( x' )*x ) + + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to ZLASSQ. + Sven Hammarling, Nag Ltd. +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else { + scale = 0.; + ssq = 1.; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp = (d__1 = x[i__3].r, abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DZNRM2. */ + +} /* dznrm2_ */ + +integer icamax_(integer *n, complex *cx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer i__, ix; + static real smax; + + +/* + finds the index of element having max. absolute value. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2)); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = ix; + if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs( + r__2)) <= smax) { + goto L5; + } + ret_val = i__; + i__2 = ix; + smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), + dabs(r__2)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2)); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__; + if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs( + r__2)) <= smax) { + goto L30; + } + ret_val = i__; + i__2 = i__; + smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), + dabs(r__2)); +L30: + ; + } + return ret_val; +} /* icamax_ */ + +integer idamax_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + doublereal d__1; + + /* Local variables */ + static integer i__, ix; + static doublereal dmax__; + + +/* + finds the index of element having max. absolute value. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + dmax__ = abs(dx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[ix], abs(d__1)) <= dmax__) { + goto L5; + } + ret_val = i__; + dmax__ = (d__1 = dx[ix], abs(d__1)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + dmax__ = abs(dx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[i__], abs(d__1)) <= dmax__) { + goto L30; + } + ret_val = i__; + dmax__ = (d__1 = dx[i__], abs(d__1)); +L30: + ; + } + return ret_val; +} /* idamax_ */ + +integer isamax_(integer *n, real *sx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + real r__1; + + /* Local variables */ + static integer i__, ix; + static real smax; + + +/* + finds the index of element having max. absolute value. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = dabs(sx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((r__1 = sx[ix], dabs(r__1)) <= smax) { + goto L5; + } + ret_val = i__; + smax = (r__1 = sx[ix], dabs(r__1)); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = dabs(sx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((r__1 = sx[i__], dabs(r__1)) <= smax) { + goto L30; + } + ret_val = i__; + smax = (r__1 = sx[i__], dabs(r__1)); +L30: + ; + } + return ret_val; +} /* isamax_ */ + +integer izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + + /* Local variables */ + static integer i__, ix; + static doublereal smax; + extern doublereal dcabs1_(doublecomplex *); + + +/* + finds the index of element having max. absolute value. + jack dongarra, 1/15/85. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + smax = dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[ix]) <= smax) { + goto L5; + } + ret_val = i__; + smax = dcabs1_(&zx[ix]); +L5: + ix += *incx; +/* L10: */ + } + return ret_val; + +/* code for increment equal to 1 */ + +L20: + smax = dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[i__]) <= smax) { + goto L30; + } + ret_val = i__; + smax = dcabs1_(&zx[i__]); +L30: + ; + } + return ret_val; +} /* izamax_ */ + +logical lsame_(char *ca, char *cb) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + static integer inta, intb, zcode; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + LSAME returns .TRUE. if CA is the same letter as CB regardless of + case. + + Arguments + ========= + + CA (input) CHARACTER*1 + CB (input) CHARACTER*1 + CA and CB specify the single characters to be compared. + + ===================================================================== + + + Test if the characters are equal +*/ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + +/* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + +/* + Use 'Z' rather than 'A' so that ASCII can be detected on Prime + machines, on which ICHAR returns a value with bit 8 set. + ICHAR('A') on Prime machines returns 193 which is the same as + ICHAR('A') on an EBCDIC machine. +*/ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + +/* + ASCII is assumed - ZCODE is the ASCII code of either lower or + upper case 'Z'. +*/ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* + EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or + upper case 'Z'. +*/ + + if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta + >= 162 && inta <= 169) { + inta += 64; + } + if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb + >= 162 && intb <= 169) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* + ASCII is assumed, on Prime machines - ZCODE is the ASCII code + plus 128 of either lower or upper case 'Z'. +*/ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* + RETURN + + End of LSAME +*/ + + return ret_val; +} /* lsame_ */ + +/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, + real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + constant times a vector plus a vector. + uses unrolled loop for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*sa == 0.f) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[iy] += *sa * sx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 4; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[i__] += *sa * sx[i__]; +/* L30: */ + } + if (*n < 4) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + sy[i__] += *sa * sx[i__]; + sy[i__ + 1] += *sa * sx[i__ + 1]; + sy[i__ + 2] += *sa * sx[i__ + 2]; + sy[i__ + 3] += *sa * sx[i__ + 3]; +/* L50: */ + } + return 0; +} /* saxpy_ */ + +doublereal scasum_(integer *n, complex *cx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real ret_val, r__1, r__2; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer i__, nincx; + static real stemp; + + +/* + takes the sum of the absolute values of a complex vector and + returns a single precision result. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --cx; + + /* Function Body */ + ret_val = 0.f; + stemp = 0.f; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[ + i__]), dabs(r__2)); +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* code for increment equal to 1 */ + +L20: + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__1 = i__; + stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[ + i__]), dabs(r__2)); +/* L30: */ + } + ret_val = stemp; + return ret_val; +} /* scasum_ */ + +doublereal scnrm2_(integer *n, complex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real ret_val, r__1; + + /* Builtin functions */ + double r_imag(complex *), sqrt(doublereal); + + /* Local variables */ + static integer ix; + static real ssq, temp, norm, scale; + + +/* + SCNRM2 returns the euclidean norm of a vector via the function + name, so that + + SCNRM2 := sqrt( conjg( x' )*x ) + + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to CLASSQ. + Sven Hammarling, Nag Ltd. +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.f; + } else { + scale = 0.f; + ssq = 1.f; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL CLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.f) { + i__3 = ix; + temp = (r__1 = x[i__3].r, dabs(r__1)); + if (scale < temp) { +/* Computing 2nd power */ + r__1 = scale / temp; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = temp; + } else { +/* Computing 2nd power */ + r__1 = temp / scale; + ssq += r__1 * r__1; + } + } + if (r_imag(&x[ix]) != 0.f) { + temp = (r__1 = r_imag(&x[ix]), dabs(r__1)); + if (scale < temp) { +/* Computing 2nd power */ + r__1 = scale / temp; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = temp; + } else { +/* Computing 2nd power */ + r__1 = temp / scale; + ssq += r__1 * r__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of SCNRM2. */ + +} /* scnrm2_ */ + +/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + + +/* + copies a vector, x, to a vector, y. + uses unrolled loops for increments equal to 1. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[iy] = sx[ix]; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 7; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + sy[i__] = sx[i__]; +/* L30: */ + } + if (*n < 7) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + sy[i__] = sx[i__]; + sy[i__ + 1] = sx[i__ + 1]; + sy[i__ + 2] = sx[i__ + 2]; + sy[i__ + 3] = sx[i__ + 3]; + sy[i__ + 4] = sx[i__ + 4]; + sy[i__ + 5] = sx[i__ + 5]; + sy[i__ + 6] = sx[i__ + 6]; +/* L50: */ + } + return 0; +} /* scopy_ */ + +doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + real ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + stemp = 0.f; + ret_val = 0.f; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[ix] * sy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[i__] * sy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ + i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + + 4] * sy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = stemp; + return ret_val; +} /* sdot_ */ + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static real temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("SGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.f) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L130: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.f) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of SGEMM . */ + +} /* sgemm_ */ + +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - REAL array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SGEMV . */ + +} /* sgemv_ */ + +/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static real temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGER performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("SGER ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.f) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.f) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.f) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of SGER . */ + +} /* sger_ */ + +doublereal snrm2_(integer *n, real *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + real ret_val, r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer ix; + static real ssq, norm, scale, absxi; + + +/* + SNRM2 returns the euclidean norm of a vector via the function + name, so that + + SNRM2 := sqrt( x'*x ) + + + -- This version written on 25-October-1982. + Modified on 14-October-1993 to inline the call to SLASSQ. + Sven Hammarling, Nag Ltd. +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.f; + } else if (*n == 1) { + norm = dabs(x[1]); + } else { + scale = 0.f; + ssq = 1.f; +/* + The following loop is equivalent to this call to the LAPACK + auxiliary routine: + CALL SLASSQ( N, X, INCX, SCALE, SSQ ) +*/ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.f) { + absxi = (r__1 = x[ix], dabs(r__1)); + if (scale < absxi) { +/* Computing 2nd power */ + r__1 = scale / absxi; + ssq = ssq * (r__1 * r__1) + 1.f; + scale = absxi; + } else { +/* Computing 2nd power */ + r__1 = absxi / scale; + ssq += r__1 * r__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of SNRM2. */ + +} /* snrm2_ */ + +/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *c__, real *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ix, iy; + static real stemp; + + +/* + applies a plane rotation. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = *c__ * sx[ix] + *s * sy[iy]; + sy[iy] = *c__ * sy[iy] - *s * sx[ix]; + sx[ix] = stemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = *c__ * sx[i__] + *s * sy[i__]; + sy[i__] = *c__ * sy[i__] - *s * sx[i__]; + sx[i__] = stemp; +/* L30: */ + } + return 0; +} /* srot_ */ + +/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + scales a vector by a constant. + uses unrolled loops for increment equal to 1. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + sx[i__] = *sa * sx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + sx[i__] = *sa * sx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + sx[i__] = *sa * sx[i__]; + sx[i__ + 1] = *sa * sx[i__ + 1]; + sx[i__ + 2] = *sa * sx[i__ + 2]; + sx[i__ + 3] = *sa * sx[i__ + 3]; + sx[i__ + 4] = *sa * sx[i__ + 4]; +/* L50: */ + } + return 0; +} /* sscal_ */ + +/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + interchanges two vectors. + uses unrolled loops for increments equal to 1. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = sx[ix]; + sx[ix] = sy[iy]; + sy[iy] = stemp; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 3; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp = sx[i__]; + sx[i__] = sy[i__]; + sy[i__] = stemp; +/* L30: */ + } + if (*n < 3) { + return 0; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + stemp = sx[i__]; + sx[i__] = sy[i__]; + sy[i__] = stemp; + stemp = sx[i__ + 1]; + sx[i__ + 1] = sy[i__ + 1]; + sy[i__ + 1] = stemp; + stemp = sx[i__ + 2]; + sx[i__ + 2] = sy[i__ + 2]; + sy[i__ + 2] = stemp; +/* L50: */ + } + return 0; +} /* sswap_ */ + +/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, + integer *lda, real *x, integer *incx, real *beta, real *y, integer * + incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n symmetric matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("SSYMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.f && *beta == 1.f) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SSYMV . */ + +} /* ssymv_ */ + +/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, + integer *incx, real *y, integer *incy, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYR2 performs the symmetric rank 2 operation + + A := alpha*x*y' + alpha*y*x' + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n symmetric matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("SSYR2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.f) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f || y[j] != 0.f) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f || y[jy] != 0.f) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f || y[j] != 0.f) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f || y[jy] != 0.f) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of SSYR2 . */ + +} /* ssyr2_ */ + +/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYR2K performs one of the symmetric rank 2k operations + + C := alpha*A*B' + alpha*B*A' + beta*C, + + or + + C := alpha*A'*B + alpha*B'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A and B are n by k matrices in the first case and k by n + matrices in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' + + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A + + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("SSYR2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*B' + alpha*B*A' + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) + { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) + { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*B + alpha*B'*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.f; + temp2 = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.f; + temp2 = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYR2K. */ + +} /* ssyr2k_ */ + +/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("SSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYRK . */ + +} /* ssyrk_ */ + +/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ), + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A'. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of STRMM . */ + +} /* strmm_ */ + +/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n, + real *a, integer *lda, real *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static real temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := A'*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - REAL array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("STRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.f) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of STRMV . */ + +} /* strmv_ */ + +/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of STRSM . */ + +} /* strsm_ */ + +/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix, iy; + extern doublereal dcabs1_(doublecomplex *); + + +/* + constant times a vector plus a vector. + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (dcabs1_(za) == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + i__4 = ix; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ + i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[ + i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zaxpy_ */ + +/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + + +/* + copies a vector, x, to a vector, y. + jack dongarra, linpack, 4/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; +/* L30: */ + } + return 0; +} /* zcopy_ */ + +/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + forms the dot product of a vector. + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[ix]); + i__2 = iy; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * + zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[i__]); + i__2 = i__; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * + zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; +/* L30: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; +} /* zdotc_ */ + +/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + forms the dot product of two vectors. + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = + zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = + zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; +/* L30: */ + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; +} /* zdotu_ */ + +/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, ix; + + +/* + scales a vector by a constant. + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *da, z__2.i = 0.; + i__3 = ix; + z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * + zx[i__3].i + z__2.i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + ix += *incx; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *da, z__2.i = 0.; + i__3 = i__; + z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * + zx[i__3].i + z__2.i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zdscal_ */ + +/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * + c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static doublecomplex temp; + static logical conja, conjb; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = conjg( A' ). + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = conjg( B' ). + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + conjugated or transposed, set CONJA and CONJB as true if A and + B respectively are to be transposed but not conjugated and set + NROWA, NCOLA and NROWB as the number of rows and columns of A + and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + conja = lsame_(transa, "C"); + conjb = lsame_(transb, "C"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! conja && ! lsame_(transa, "T")) { + info = 1; + } else if (! notb && ! conjb && ! lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("ZGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && + (beta->r == 1. && beta->i == 0.)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[ + i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = l + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else if (conja) { + +/* Form C := alpha*conjg( A' )*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L110: */ + } +/* L120: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = l + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L140: */ + } +/* L150: */ + } + } + } else if (nota) { + if (conjb) { + +/* Form C := alpha*A*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L160: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L210: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L220: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = j + l * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L230: */ + } + } +/* L240: */ + } +/* L250: */ + } + } + } else if (conja) { + if (conjb) { + +/* Form C := alpha*conjg( A' )*conjg( B' ) + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + d_cnjg(&z__4, &b[j + l * b_dim1]); + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = + z__3.r * z__4.i + z__3.i * z__4.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L260: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L270: */ + } +/* L280: */ + } + } else { + +/* Form C := alpha*conjg( A' )*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = j + l * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L290: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L300: */ + } +/* L310: */ + } + } + } else { + if (conjb) { + +/* Form C := alpha*A'*conjg( B' ) + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + d_cnjg(&z__3, &b[j + l * b_dim1]); + z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, + z__2.i = a[i__4].r * z__3.i + a[i__4].i * + z__3.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L320: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L330: */ + } +/* L340: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = j + l * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L350: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L360: */ + } +/* L370: */ + } + } + } + + return 0; + +/* End of ZGEMM . */ + +} /* zgemm_ */ + +/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * + incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj; + + +/* + Purpose + ======= + + ZGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or + + y := alpha*conjg( A' )*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - COMPLEX*16 array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX*16 array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("ZGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == + 1. && beta->i == 0.)) { + return 0; + } + + noconj = lsame_(trans, "T"); + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + ix = kx; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L140: */ + } + } + } + + return 0; + +/* End of ZGEMV . */ + +} /* zgemv_ */ + +/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublecomplex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGERC performs the rank 1 operation + + A := alpha*x*conjg( y' ) + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZGERC ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of ZGERC . */ + +} /* zgerc_ */ + +/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Local variables */ + static integer i__, j, ix, jy, kx, info; + static doublecomplex temp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZGERU performs the rank 1 operation + + A := alpha*x*y' + A, + + where alpha is a scalar, x is an m element vector, y is an n element + vector and A is an m by n matrix. + + Parameters + ========== + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( m - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the m + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. On exit, A is + overwritten by the updated matrix. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZGERU ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + i__2 = jy; + z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + i__2 = jy; + z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i = + alpha->r * y[i__2].i + alpha->i * y[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of ZGERU . */ + +} /* zgeru_ */ + +/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHEMV performs the matrix-vector operation + + y := alpha*A*x + beta*y, + + where alpha and beta are scalars, x and y are n element vectors and + A is an n by n hermitian matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. + Note that the imaginary parts of the diagonal elements need + not be set and are assumed to be zero. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - COMPLEX*16 . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. On exit, Y is overwritten by the updated + vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_("ZHEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && + beta->i == 0.)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. + + First form y := beta*y. +*/ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, "U")) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L90: */ + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of ZHEMV . */ + +} /* zhemv_ */ + +/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHER2 performs the hermitian rank 2 operation + + A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, + + where alpha is a scalar, x and y are n element vectors and A is an n + by n hermitian matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array A is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of A + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of A + is to be referenced. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + Y - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCY ) ). + Before entry, the incremented array Y must contain the n + element vector y. + Unchanged on exit. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of A is not referenced. On exit, the + upper triangular part of the array A is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of A is not referenced. On exit, the + lower triangular part of the array A is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_("ZHER2 ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* + Set up the start points in X and Y if the increments are not both + unity. +*/ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through the triangular part + of A. +*/ + + if (lsame_(uplo, "U")) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + iy += *incy; +/* L30: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L70: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of ZHER2 . */ + +} /* zher2_ */ + +/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, l, info; + static doublecomplex temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHER2K performs one of the hermitian rank 2k operations + + C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, + + where alpha and beta are scalars with beta real, C is an n by n + hermitian matrix and A and B are n by k matrices in the first case + and k by n matrices in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( B' ) + + conjg( alpha )*B*conjg( A' ) + + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*B + + conjg( alpha )*conjg( B' )*A + + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrices A and B, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrices A and B. K must be at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array B must contain the matrix B, otherwise + the leading k by n part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDB must be at least max( 1, n ), otherwise LDB must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_("ZHER2K", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == + 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* + Form C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L150: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L160: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* + Form C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + + C. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L190: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L220: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of ZHER2K. */ + +} /* zher2k_ */ + +/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, + doublecomplex *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, l, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static doublereal rtemp; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + ZHERK performs one of the hermitian rank k operations + + C := alpha*A*conjg( A' ) + beta*C, + + or + + C := alpha*conjg( A' )*A + beta*C, + + where alpha and beta are real scalars, C is an n by n hermitian + matrix and A is an n by k matrix in the first case and a k by n + matrix in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*conjg( A' ) + beta*C. + + TRANS = 'C' or 'c' C := alpha*conjg( A' )*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'C' or 'c', K specifies the number of rows of the + matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - DOUBLE PRECISION . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - DOUBLE PRECISION. + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - COMPLEX*16 array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the hermitian matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the hermitian matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + Note that the imaginary parts of the diagonal elements need + not be set, they are assumed to be zero, and on exit they + are set to zero. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. + Ed Anderson, Cray Research Inc. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("ZHERK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*conjg( A' ) + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L150: */ + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5] + .r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[ + i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5] + .i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*conjg( A' )*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L200: */ + } + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = + z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; +/* L210: */ + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } +/* L220: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i = + z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; +/* L230: */ + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L240: */ + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L250: */ + } +/* L260: */ + } + } + } + + return 0; + +/* End of ZHERK . */ + +} /* zherk_ */ + +/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, ix; + + +/* + scales a vector by a constant. + jack dongarra, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + ix = 1; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = ix; + z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ + i__3].i + za->i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + ix += *incx; +/* L10: */ + } + return 0; + +/* code for increment equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[ + i__3].i + za->i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; +/* L30: */ + } + return 0; +} /* zscal_ */ + +/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ztemp; + + +/* + interchanges two vectors. + jack dongarra, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = ix; + i__3 = iy; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = iy; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = i__; + i__3 = i__; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = i__; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; +/* L30: */ + } + return 0; +} /* zswap_ */ + +/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, k, info; + static doublecomplex temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRMM performs one of the matrix-matrix operations + + B := alpha*op( A )*B, or B := alpha*B*op( A ) + + where alpha is a scalar, B is an m by n matrix, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) multiplies B from + the left or right as follows: + + SIDE = 'L' or 'l' B := alpha*op( A )*B. + + SIDE = 'R' or 'r' B := alpha*B*op( A ). + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B, and on exit is overwritten by the + transformed matrix. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("ZTRMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = k + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6] + .i, z__2.i = temp.r * a[i__6].i + + temp.i * a[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L30: */ + } + if (nounit) { + i__3 = k + k * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = k + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + i__2 = k + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] + .i, z__1.i = alpha->r * b[i__2].i + + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = k + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + if (nounit) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k + k * a_dim1; + z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * + a[i__4].i, z__1.i = b[i__3].r * a[ + i__4].i + b[i__3].i * a[i__4].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5] + .i, z__2.i = temp.r * a[i__5].i + + temp.i * a[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A'*B or B := alpha*conjg( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + if (noconj) { + if (nounit) { + i__2 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2] + .i, z__1.i = temp.r * a[i__2].i + + temp.i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, z__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] + .i, z__2.i = z__3.r * b[i__3].i + + z__3.i * b[i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + if (noconj) { + if (nounit) { + i__3 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, z__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] + .i, z__2.i = z__3.r * b[i__4].i + + z__3.i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L140: */ + } + } + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[i__1] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3] + .r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L170: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = k + j * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] + .i, z__1.i = alpha->r * a[i__2].i + + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4] + .r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L210: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] + .i, z__1.i = alpha->r * a[i__3].i + + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L220: */ + } + } +/* L230: */ + } +/* L240: */ + } + } + } else { + +/* Form B := alpha*B*A' or B := alpha*B*conjg( A' ). */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[ + i__3].i, z__1.i = alpha->r * a[i__3] + .i + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L250: */ + } + } +/* L260: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__2 = k + k * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L270: */ + } + } +/* L280: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[ + i__2].i, z__1.i = alpha->r * a[i__2] + .i + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L290: */ + } + } +/* L300: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__1 = k + k * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L310: */ + } + } +/* L320: */ + } + } + } + } + + return 0; + +/* End of ZTRMM . */ + +} /* ztrmm_ */ + +/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRMV performs one of the matrix-vector operations + + x := A*x, or x := A'*x, or x := conjg( A' )*x, + + where x is an n element vector and A is an n by n unit, or non-unit, + upper or lower triangular matrix. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' x := A*x. + + TRANS = 'T' or 't' x := A'*x. + + TRANS = 'C' or 'c' x := conjg( A' )*x. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element vector x. On exit, X is overwritten with the + tranformed vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("ZTRMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = ix; + i__3 = ix; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = i__ + j * a_dim1; + i__2 = ix; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = ix; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTRMV . */ + +} /* ztrmv_ */ + +/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6, i__7; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( + doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, k, info; + static doublecomplex temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A' or op( A ) = conjg( A' ). + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = conjg( A' ). + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - COMPLEX*16 . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - COMPLEX*16 array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, "T"); + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("ZTRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + if (nounit) { + i__2 = k + j * b_dim1; + z_div(&z__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * + a[i__6].i, z__2.i = b[i__5].r * a[ + i__6].i + b[i__5].i * a[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + if (nounit) { + i__3 = k + j * b_dim1; + z_div(&z__1, &b[k + j * b_dim1], &a[k + k * + a_dim1]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * + a[i__7].i, z__2.i = b[i__6].r * a[ + i__7].i + b[i__6].i * a[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* + Form B := alpha*inv( A' )*B + or B := alpha*inv( conjg( A' ) )*B. +*/ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, z__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L110: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] + .i, z__2.i = z__3.r * b[i__4].i + + z__3.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; +/* L130: */ + } +/* L140: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, z__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] + .i, z__2.i = z__3.r * b[i__3].i + + z__3.i * b[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; +/* L170: */ + } +/* L180: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L190: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * + b[i__7].i, z__2.i = a[i__6].r * b[ + i__7].i + a[i__6].i * b[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L200: */ + } + } +/* L210: */ + } + if (nounit) { + z_div(&z__1, &c_b1071, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L220: */ + } + } +/* L230: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L240: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * + b[i__6].i, z__2.i = a[i__5].r * b[ + i__6].i + a[i__5].i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L250: */ + } + } +/* L260: */ + } + if (nounit) { + z_div(&z__1, &c_b1071, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L270: */ + } + } +/* L280: */ + } + } + } else { + +/* + Form B := alpha*B*inv( A' ) + or B := alpha*B*inv( conjg( A' ) ). +*/ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + z_div(&z__1, &c_b1071, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z_div(&z__1, &c_b1071, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L290: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + d_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4] + .i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L300: */ + } + } +/* L310: */ + } + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L320: */ + } + } +/* L330: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + z_div(&z__1, &c_b1071, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z_div(&z__1, &c_b1071, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L340: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + d_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5] + .i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L350: */ + } + } +/* L360: */ + } + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4] + .i, z__1.i = alpha->r * b[i__4].i + + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L370: */ + } + } +/* L380: */ + } + } + } + } + + return 0; + +/* End of ZTRSM . */ + +} /* ztrsm_ */ + +/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( + doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, ix, jx, kx, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconj, nounit; + + +/* + Purpose + ======= + + ZTRSV solves one of the systems of equations + + A*x = b, or A'*x = b, or conjg( A' )*x = b, + + where b and x are n element vectors and A is an n by n unit, or + non-unit, upper or lower triangular matrix. + + No test for singularity or near-singularity is included in this + routine. Such tests must be performed before calling this routine. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the equations to be solved as + follows: + + TRANS = 'N' or 'n' A*x = b. + + TRANS = 'T' or 't' A'*x = b. + + TRANS = 'C' or 'c' conjg( A' )*x = b. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit + triangular as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix A. + N must be at least zero. + Unchanged on exit. + + A - COMPLEX*16 array of DIMENSION ( LDA, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, n ). + Unchanged on exit. + + X - COMPLEX*16 array of dimension at least + ( 1 + ( n - 1 )*abs( INCX ) ). + Before entry, the incremented array X must contain the n + element right-hand side vector b. On exit, X is overwritten + with the solution vector x. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_("ZTRSV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T"); + nounit = lsame_(diag, "N"); + +/* + Set up the start point in X if the increment is not unity. This + will be ( N - 1 )*INCX too small for descending loops. +*/ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. +*/ + + if (lsame_(trans, "N")) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ + + if (lsame_(uplo, "U")) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L110: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, z__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L170: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[ + i__3].i, z__2.i = a[i__2].r * x[i__3].i + + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L180: */ + } + if (nounit) { + z_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L190: */ + } + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTRSV . */ + +} /* ztrsv_ */ + diff --git a/numpy/linalg/lapack_lite/f2c_c_lapack.c b/numpy/linalg/lapack_lite/f2c_c_lapack.c new file mode 100644 index 000000000..e2c757728 --- /dev/null +++ b/numpy/linalg/lapack_lite/f2c_c_lapack.c @@ -0,0 +1,24264 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static complex c_b55 = {0.f,0.f}; +static complex c_b56 = {1.f,0.f}; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; +static integer c__8 = 8; +static integer c__4 = 4; +static integer c__65 = 65; +static real c_b871 = 1.f; +static integer c__15 = 15; +static logical c_false = FALSE_; +static real c_b1101 = 0.f; +static integer c__9 = 9; +static real c_b1150 = -1.f; +static real c_b1794 = .5f; + +/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, complex *v, integer *ldv, + integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static real s; + static integer ii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static logical leftv; + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGEBAK forms the right or left eigenvectors of a complex general + matrix by backward transformation on the computed eigenvectors of the + balanced matrix output by CGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to CGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by CGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) REAL array, dimension (N) + Details of the permutation and scaling factors, as returned + by CGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) COMPLEX array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by CHSEIN or CTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + csscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1.f / scale[i__]; + csscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L40; + } + cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L50; + } + cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of CGEBAK */ + +} /* cgebak_ */ + +/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1, r__2; + + /* Builtin functions */ + double r_imag(complex *), c_abs(complex *); + + /* Local variables */ + static real c__, f, g; + static integer i__, j, k, l, m; + static real r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static real sfmin1, sfmin2, sfmax1, sfmax2; + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGEBAL balances a general complex matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) REAL array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine CBAL. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (real) j; + if (j == m) { + goto L30; + } + + cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + i__2 = j + i__ * a_dim1; + if (a[i__2].r != 0.f || r_imag(&a[j + i__ * a_dim1]) != 0.f) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + i__3 = i__ + j * a_dim1; + if (a[i__3].r != 0.f || r_imag(&a[i__ + j * a_dim1]) != 0.f) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = slamch_("S") / slamch_("P"); + sfmax1 = 1.f / sfmin1; + sfmin2 = sfmin1 * 8.f; + sfmax2 = 1.f / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.f; + r__ = 0.f; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + i__3 = j + i__ * a_dim1; + c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__ + * a_dim1]), dabs(r__2)); + i__3 = i__ + j * a_dim1; + r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j + * a_dim1]), dabs(r__2)); +L150: + ; + } + ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = c_abs(&a[ica + i__ * a_dim1]); + i__2 = *n - k + 1; + ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0.f || r__ == 0.f) { + goto L200; + } + g = r__ / 8.f; + f = 1.f; + s = c__ + r__; +L160: +/* Computing MAX */ + r__1 = max(f,c__); +/* Computing MIN */ + r__2 = min(r__,g); + if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { + goto L170; + } + f *= 8.f; + c__ *= 8.f; + ca *= 8.f; + r__ /= 8.f; + g /= 8.f; + ra /= 8.f; + goto L160; + +L170: + g = c__ / 8.f; +L180: +/* Computing MIN */ + r__1 = min(f,c__), r__1 = min(r__1,g); + if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { + goto L190; + } + f /= 8.f; + c__ /= 8.f; + g /= 8.f; + ca /= 8.f; + r__ *= 8.f; + ra *= 8.f; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95f) { + goto L200; + } + if (f < 1.f && scale[i__] < 1.f) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1.f && scale[i__] > 1.f) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1.f / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of CGEBAL */ + +} /* cgebal_ */ + +/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGEBD2 reduces a complex general m by n matrix A to upper or lower + real bidiagonal form B by a unitary transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace) COMPLEX array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, v and u are complex vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("CGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + r_cnjg(&q__1, &tauq[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.f; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + } else { + i__2 = i__; + taup[i__2].r = 0.f, taup[i__2].i = 0.f; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; +/* Computing MIN */ + i__4 = i__ + 1; + clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ + i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.f; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + r_cnjg(&q__1, &tauq[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & + work[1]); + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + } else { + i__2 = i__; + tauq[i__2].r = 0.f, tauq[i__2].i = 0.f; + } +/* L20: */ + } + } + return 0; + +/* End of CGEBD2 */ + +} /* cgebd2_ */ + +/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tauq, complex *taup, complex *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__, j, nb, nx; + static real ws; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *), clabrd_(integer *, integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGEBRD reduces a general complex M-by-N matrix A to upper or lower + bidiagonal form B by a unitary transformation: Q**H * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + r__1 = (real) lwkopt; + work[1].r = r__1, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("CGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + ws = (real) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (real) ((*m + *n) * nb); + if ((real) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+ib-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+ib:m,i+ib:n), using + an update of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & + q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + + nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], + lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.f; + i__4 = j + (j + 1) * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.f; + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1].r = ws, work[1].i = 0.f; + return 0; + +/* End of CGEBRD */ + +} /* cgebrd_ */ + +/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a, + integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr, + integer *ldvr, complex *work, integer *lwork, real *rwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4; + real r__1, r__2; + complex q__1, q__2; + + /* Builtin functions */ + double sqrt(doublereal), r_imag(complex *); + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, k, ihi; + static real scl; + static integer ilo; + static real dum[1], eps; + static complex tmp; + static integer ibal; + static char side[1]; + static integer maxb; + static real anrm; + static integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + extern doublereal scnrm2_(integer *, complex *, integer *); + extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *, + integer *, integer *, real *, integer *), slabad_(real *, + real *); + static logical scalea; + extern doublereal clange_(char *, integer *, integer *, complex *, + integer *, real *); + static real cscale; + extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), clacpy_(char *, integer *, integer *, complex *, integer *, + complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + static real bignum; + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), ctrevc_(char *, + char *, logical *, integer *, complex *, integer *, complex *, + integer *, complex *, integer *, integer *, integer *, complex *, + real *, integer *), cunghr_(integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static real smlnum; + static integer hswork, irwork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGEEV computes for an N-by-N complex nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) COMPLEX array, dimension (N) + W contains the computed eigenvalues. + + VL (output) COMPLEX array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + u(j) = VL(:,j), the j-th column of VL. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) COMPLEX array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + v(j) = VR(:,j), the j-th column of VR. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,2*N). + For good performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace) REAL array, dimension (2*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements and i+1:N of W contain eigenvalues which have + converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to real + workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by CHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + minwrk = 1; + if (*info == 0 && (*lwork >= 1 || lquery)) { + maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, ( + ftnlen)6, (ftnlen)1); + if (! wantvl && ! wantvr) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); + maxwrk = max(maxwrk,hswork); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = max(maxwrk,hswork), i__2 = *n << 1; + maxwrk = max(i__1,i__2); + } + work[1].r = (real) maxwrk, work[1].i = 0.f; + } + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (CWorkspace: none) + (RWorkspace: need N) +*/ + + ibal = 1; + cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: none) +*/ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate unitary matrix in VL + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate unitary matrix in VR + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from CHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (CWorkspace: need 2*N) + (RWorkspace: need 2*N) +*/ + + irwork = ibal + *n; + ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], + &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], + ldvl, &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vl_dim1; +/* Computing 2nd power */ + r__1 = vl[i__3].r; +/* Computing 2nd power */ + r__2 = r_imag(&vl[k + i__ * vl_dim1]); + rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &rwork[irwork], &c__1); + r_cnjg(&q__2, &vl[k + i__ * vl_dim1]); + r__1 = sqrt(rwork[irwork + k - 1]); + q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; + tmp.r = q__1.r, tmp.i = q__1.i; + cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = k + i__ * vl_dim1; + i__3 = k + i__ * vl_dim1; + r__1 = vl[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + vl[i__2].r = q__1.r, vl[i__2].i = q__1.i; +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], + ldvr, &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vr_dim1; +/* Computing 2nd power */ + r__1 = vr[i__3].r; +/* Computing 2nd power */ + r__2 = r_imag(&vr[k + i__ * vr_dim1]); + rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &rwork[irwork], &c__1); + r_cnjg(&q__2, &vr[k + i__ * vr_dim1]); + r__1 = sqrt(rwork[irwork + k - 1]); + q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1; + tmp.r = q__1.r, tmp.i = q__1.i; + cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = k + i__ * vr_dim1; + i__3 = k + i__ * vr_dim1; + r__1 = vr[i__3].r; + q__1.r = r__1, q__1.i = 0.f; + vr[i__2].r = q__1.r, vr[i__2].i = q__1.i; +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] + , &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, + &ierr); + } + } + + work[1].r = (real) maxwrk, work[1].i = 0.f; + return 0; + +/* End of CGEEV */ + +} /* cgeev_ */ + +/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGEHD2 reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ + i__]); + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + r_cnjg(&q__1, &tau[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1, + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; +/* L10: */ + } + + return 0; + +/* End of CGEHD2 */ + +} /* cgehd2_ */ + +/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer ib; + static complex ei; + static integer nb, nh, nx, iws; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *), clarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *), clahrd_( + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGEHRD reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0.f, tau[i__2].i = 0.f; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0.f, tau[i__2].i = 0.f; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code. + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1. +*/ + + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + ei.r = a[i__3].r, ei.i = a[i__3].i; + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + i__3 = *ihi - i__ - ib + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & + q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, + &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda); + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = ei.r, a[i__3].i = ei.i; + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & + i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L30: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1].r = (real) iws, work[1].i = 0.f; + + return 0; + +/* End of CGEHRD */ + +} /* cgehrd_ */ + +/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGELQ2 computes an LQ factorization of a complex m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] + ); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); +/* L10: */ + } + return 0; + +/* End of CGELQ2 */ + +} /* cgelq2_ */ + +/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *), clarfb_(char *, char + *, char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGELQF computes an LQ factorization of a complex M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CGELQF */ + +} /* cgelqf_ */ + +/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, k; + static complex alpha; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *), + clarfg_(integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGEQR2 computes a QR factorization of a complex m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + r_cnjg(&q__1, &tau[i__]); + clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + } +/* L10: */ + } + return 0; + +/* End of CGEQR2 */ + +} /* cgeqr2_ */ + +/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda, + complex *tau, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *), clarfb_(char *, char + *, char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CGEQRF computes a QR factorization of a complex M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" + , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & + work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, + &work[ib + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CGEQRF */ + +} /* cgeqrf_ */ + +/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a, + integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer + *ldvt, complex *work, integer *lwork, real *rwork, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static real dum[1], eps; + static integer iru, ivt, iscl; + static real anrm; + static integer idum[1], ierr, itau, irvt; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + static integer chunk, minmn, wrkbl, itaup, itauq; + static logical wntqa; + static integer nwork; + extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *, + integer *, complex *, integer *); + static logical wntqn, wntqo, wntqs; + static integer mnthr1, mnthr2; + extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, + integer *, real *, real *, complex *, complex *, complex *, + integer *, integer *); + extern doublereal clange_(char *, integer *, integer *, complex *, + integer *, real *); + extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, + integer *, complex *, complex *, integer *, integer *), clacrm_( + integer *, integer *, complex *, integer *, real *, integer *, + complex *, integer *, real *), clarcm_(integer *, integer *, real + *, integer *, complex *, integer *, complex *, integer *, real *), + clascl_(char *, integer *, integer *, real *, real *, integer *, + integer *, complex *, integer *, integer *), sbdsdc_(char + *, char *, integer *, real *, real *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer + *, complex *, complex *, integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), claset_(char *, + integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer + *, complex *, integer *, complex *, complex *, integer *, integer + *); + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunglq_( + integer *, integer *, integer *, complex *, integer *, complex *, + complex *, integer *, integer *); + static integer ldwrkl; + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + static real smlnum; + static logical wntqas, lquery; + static integer nrwork; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + CGESDD computes the singular value decomposition (SVD) of a complex + M-by-N matrix A, optionally computing the left and/or right singular + vectors, by using divide-and-conquer method. The SVD is written + + A = U * SIGMA * conjugate-transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + V is an N-by-N unitary matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**H, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**H are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**H are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**H are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**H are overwritten + in the array VT; + = 'N': no columns of U or rows of V**H are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**H (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) COMPLEX array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + unitary matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) COMPLEX array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N unitary matrix V**H; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**H (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). + if JOBZ = 'O', + LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + if JOBZ = 'S' or 'A', + LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + For good performance, LWORK should generally be larger. + If LWORK < 0 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + RWORK (workspace) REAL array, dimension (LRWORK) + If JOBZ = 'N', LRWORK >= 7*min(M,N). + Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The updating process of SBDSDC did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr1 = (integer) (minmn * 17.f / 9.f); + mnthr2 = (integer) (minmn * 5.f / 3.f); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to + real workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC, + BDSPAC = 3*N*N + 4*N +*/ + + if (*m >= mnthr1) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); + maxwrk = wrkbl; + minwrk = *n * 3; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *n * *n + wrkbl; + minwrk = (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + (*n << 1) + *m; + } + } else if (*m >= mnthr2) { + +/* Path 5 (M much larger than N, but not as much as MNTHR1) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6 (M at least N, but not much larger) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } else { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC, + BDSPAC = 3*M*M + 4*M +*/ + + if (*n >= mnthr1) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *m * 3; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *m * *m + wrkbl; + minwrk = (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + (*m << 1) + *n; + } + } else if (*n >= mnthr2) { + +/* Path 5t (N much larger than M, but not as much as MNTHR1) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6t (N greater than M, but not much larger) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } + maxwrk = max(maxwrk,minwrk); + work[1].r = (real) maxwrk, work[1].i = 0.f; + } + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + if (*lwork >= 1) { + work[1].r = 1.f, work[1].i = 0.f; + } + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = clange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr1) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nrwork = ie + *n; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ='O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + if (*lwork >= *m * *n + *n * *n + *n * 3) { + +/* WORK(IR) is M by N */ + + ldwrkr = *m; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK( IR ), zeroing out below it */ + + clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of R in WORK(IRU) and computing right singular vectors + of R in WORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of R + (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by the right singular vectors of R + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (CWorkspace: need 2*N*N, prefer N*N+M*N) + (RWorkspace: 0) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1], + lda, &work[iu], &ldwrku, &c_b55, &work[ir], & + ldwrkr); + clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b55, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (CWorkspace: need N+M, prefer N+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out below it */ + + i__2 = *n - 1; + i__1 = *n - 1; + claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b55, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else if (*m >= mnthr2) { + +/* + MNTHR2 <= M < MNTHR1 + + Path 5 (M much larger than N, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + CUNGBR and matrix multiplication to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK(IU) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in WORK(IU), copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] + , &ldwrku, &rwork[nrwork]); + clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__2 = *m; + i__1 = ldwrku; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, + &work[iu], &ldwrku, &rwork[nrwork]); + clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else if (wntqs) { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need N*N+2*M*N) +*/ + + nrwork = irvt; + clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } else { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: 0) + (Rworkspace: need 3*N*N) +*/ + + nrwork = irvt; + clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else { + +/* + M .LT. MNTHR2 + + Path 6 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition + Use CUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK( IU ) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of A, copying + to A + (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) + (Rworkspace: need 0) +*/ + + claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku); + clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__1, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__1 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], + n, &work[iu], &ldwrku, &rwork[nrwork]); + clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L30: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu); + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu); + i__2 = *m - *n; + i__1 = *m - *n; + claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n + + 1) * u_dim1], ldu); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 2*N+M, prefer 2*N+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition + (if sufficient workspace available) +*/ + + if (*n >= mnthr1) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__1 = *m - 1; + claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need 3*M, prefer 2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + nrwork = ie + *m; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + ldwkvt = *m; + +/* WORK(IVT) is M by M */ + + il = ivt + ldwkvt * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3) { + +/* WORK(IL) M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + +/* WORK(IL) is M by CHUNK */ + + ldwrkl = *m; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + itau = il + ldwrkl * chunk; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by the right singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by Q + in A, storing result in WORK(IL) and copying to A + (CWorkspace: need 2*M*M, prefer M*M+M*N)) + (RWorkspace: 0) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b55, &work[il], & + ldwrkl); + clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Copy VT to WORK(IL), multiply right singular vectors of L + in WORK(IL) by Q in A, storing result in VT + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b55, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 9t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (CWorkspace: need M+N, prefer M+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__1, &ierr); + +/* Produce L in A, zeroing out above it */ + + i__1 = *m - 1; + i__2 = *m - 1; + claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b55, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else if (*n >= mnthr2) { + +/* + MNTHR2 <= N < MNTHR1 + + Path 5t (N much larger than M, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + CUNGBR and matrix multiplication to compute singular vectors +*/ + + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__1 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + ivt = nwork; + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + nwork], &i__1, &ierr); + + ldwkvt = *m; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + nwork = ivt + ldwkvt * *n; + chunk = *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRVT) + storing the result in WORK(IVT), copying to U + (Cworkspace: need 0) + (Rworkspace: need 2*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & + ldwkvt, &rwork[nrwork]); + clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); + +/* + Multiply RWORK(IRVT) by P**H in A, storing the + result in WORK(IVT), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 2*M*M, prefer 2*M*N) +*/ + + nrwork = iru; + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], + lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L50: */ + } + } else if (wntqs) { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + nrwork = iru; + clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } else { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } + + } else { + +/* + N .LT. MNTHR2 + + Path 6t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition + Use CUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__2 = *lwork - nwork + 1; + cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3) { + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of A, + copying to A + (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) + (Rworkspace: need 0) +*/ + + clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + i__2 = *lwork - nwork + 1; + cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +*/ + + nrwork = iru; + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] + , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L60: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt); + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + + sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Set the right corner of VT to identity matrix */ + + i__1 = *n - *m; + i__2 = *n - *m; + claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m + + 1) * vt_dim1], ldvt); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 2*M+N, prefer 2*M+N*NB) + (RWorkspace: M*M) +*/ + + claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt); + clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (real) maxwrk, work[1].i = 0.f; + + return 0; + +/* End of CGESDD */ + +} /* cgesdd_ */ + +/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer * + lda, integer *ipiv, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *, + integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer + *, integer *, complex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + CGESV computes the solution to a complex system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of CGESV */ + +} /* cgesv_ */ + +/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void c_div(complex *, complex *, complex *); + + /* Local variables */ + static integer j, jp; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgeru_(integer *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, integer *), cswap_( + integer *, complex *, integer *, complex *, integer *); + extern integer icamax_(integer *, complex *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + i__2 = jp + j * a_dim1; + if (a[i__2].r != 0.f || a[i__2].i != 0.f) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + i__2 = *m - j; + c_div(&q__1, &c_b56, &a[j + j * a_dim1]); + cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1); + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + q__1.r = -1.f, q__1.i = -0.f; + cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) + ; + } +/* L10: */ + } + return 0; + +/* End of CGETF2 */ + +} /* cgetf2_ */ + +/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, jb, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + static integer iinfo; + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), cgetf2_(integer *, + integer *, complex *, integer *, integer *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int claswp_(integer *, complex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of CGETRF */ + +} /* cgetrf_ */ + +/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex * + a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), xerbla_(char *, + integer *), claswp_(integer *, complex *, integer *, + integer *, integer *, integer *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CGETRS solves a system of linear equations + A * X = B, A**T * X = B, or A**H * X = B + with a general N-by-N matrix A using the LU factorization computed + by CGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A**T * X = B (Transpose) + = 'C': A**H * X = B (Conjugate transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by CGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from CGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A**T * X = B or A**H * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset], + lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of CGETRS */ + +} /* cgetrs_ */ + +/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a, + integer *lda, real *w, complex *work, integer *lwork, real *rwork, + integer *lrwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real eps; + static integer inde; + static real anrm; + static integer imax; + static real rmin, rmax; + static integer lopt; + static real sigma; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer lwmin, liopt; + static logical lower; + static integer llrwk, lropt; + static logical wantz; + static integer indwk2, llwrk2; + extern doublereal clanhe_(char *, char *, integer *, complex *, integer *, + real *); + static integer iscale; + extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *, + integer *, complex *, integer *, real *, integer *, integer *, + integer *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer + *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer + *, complex *, integer *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + static integer indtau, indrwk, indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + static integer lrwmin; + extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static integer llwork; + static real smlnum; + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CHEEVD computes all eigenvalues and, optionally, eigenvectors of a + complex Hermitian matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA, N) + On entry, the Hermitian matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) REAL array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. + If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace/output) REAL array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If N <= 1, LRWORK must be at least 1. + If JOBZ = 'N' and N > 1, LRWORK must be at least N. + If JOBZ = 'V' and N > 1, LRWORK must be at least + 1 + 5*N + 2*N**2. + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the RWORK array, + returns this value as the first entry of the RWORK array, and + no error message related to LRWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the algorithm failed to converge; i + off-diagonal elements of an intermediate tridiagonal + form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHEEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + clascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call CHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); +/* Computing MAX */ + i__1 = indwrk; + r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r; + lopt = dmax(r__1,r__2); + +/* + For eigenvalues only, call SSTERF. For eigenvectors, first call + CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call CUNMTR to multiply it to the + Householder transformations represented as Householder vectors in + A. +*/ + + if (! wantz) { + ssterf_(n, &w[1], &rwork[inde], info); + } else { + cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* + Computing MAX + Computing 2nd power +*/ + i__3 = *n; + i__4 = indwk2; + i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r; + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + r__1 = 1.f / sigma; + sscal_(&imax, &r__1, &w[1], &c__1); + } + + work[1].r = (real) lopt, work[1].i = 0.f; + rwork[1] = (real) lropt; + iwork[1] = liopt; + + return 0; + +/* End of CHEEVD */ + +} /* cheevd_ */ + +/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__; + static complex taui; + extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, integer *); + static complex alpha; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * + , integer *, complex *, integer *, complex *, complex *, integer * + ), caxpy_(integer *, complex *, complex *, integer *, + complex *, integer *); + static logical upper; + extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, + integer *, complex *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + CHETD2 reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + q__3.r = -.5f, q__3.i = -0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + q__1.r = -1.f, q__1.i = -0.f; + cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.f; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + r__1 = a[i__2].r; + a[i__1].r = r__1, a[i__1].i = 0.f; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0.f || taui.i != 0.f) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + q__3.r = -.5f, q__3.i = -0.f; + q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r * + taui.i + q__3.i * taui.r; + i__2 = *n - i__; + cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = *n - i__; + caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.f; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of CHETD2 */ + +} /* chetd2_ */ + +/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, + real *d__, real *e, complex *tau, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer + *, real *, real *, complex *, integer *), cher2k_(char *, + char *, integer *, integer *, complex *, complex *, integer *, + complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer + *, real *, complex *, complex *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CHETRD reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q**H * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHETRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset], lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+nb:n,i+nb:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + q__1.r = -1.f, q__1.i = -0.f; + cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.f; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CHETRD */ + +} /* chetrd_ */ + +/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, + integer *ldz, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], + i__5, i__6; + real r__1, r__2, r__3, r__4; + complex q__1; + char ch__1[2]; + + /* Builtin functions */ + double r_imag(complex *); + void r_cnjg(complex *, complex *); + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__, j, k, l; + static complex s[225] /* was [15][15] */, v[16]; + static integer i1, i2, ii, nh, nr, ns, nv; + static complex vv[16]; + static integer itn; + static complex tau; + static integer its; + static real ulp, tst1; + static integer maxb, ierr; + static real unfl; + static complex temp; + static real ovfl; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *), ccopy_(integer *, complex *, integer *, + complex *, integer *); + static integer itemp; + static real rtemp; + static logical initz, wantt, wantz; + static real rwork[1]; + extern doublereal slapy2_(real *, real *); + extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *, + complex *, complex *, integer *, complex *); + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *), clanhs_(char *, integer *, + complex *, integer *, real *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), clahqr_(logical *, logical *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, integer *, complex *, + integer *, integer *), clacpy_(char *, integer *, integer *, + complex *, integer *, complex *, integer *), claset_(char + *, integer *, integer *, complex *, complex *, complex *, integer + *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex + *, complex *, complex *, integer *, complex *); + static real smlnum; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CHSEQR computes the eigenvalues of a complex upper Hessenberg + matrix H, and, optionally, the matrices T and Z from the Schur + decomposition H = Z T Z**H, where T is an upper triangular matrix + (the Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary matrix Q, + so that this routine can give the Schur factorization of a matrix A + which has been reduced to the Hessenberg form H by the unitary + matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an unitary matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to CGEBAL, and then passed to CGEHRD + when the matrix output by CGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if JOB = 'S', H contains the upper triangular matrix + T from the Schur decomposition (the Schur form). If + JOB = 'E', the contents of H are unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues. If JOB = 'S', the eigenvalues are + stored in the same order as on the diagonal of the Schur form + returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX array, dimension (LDZ,N) + If COMPZ = 'N': Z is not referenced. + If COMPZ = 'I': on entry, Z need not be set, and on exit, Z + contains the unitary matrix Z of the Schur vectors of H. + If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, + which is assumed to be equal to the unit matrix except for + the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. + Normally Q is the unitary matrix generated by CUNGHR after + the call to CGEHRD which formed the Hessenberg matrix H. + + LDZ (input) INTEGER + The leading dimension of the array Z. + LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, CHSEQR failed to compute all the + eigenvalues in a total of 30*(IHI-ILO+1) iterations; + elements 1:ilo-1 and i+1:n of W contain those + eigenvalues which have been successfully computed. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + + *info = 0; + i__1 = max(1,*n); + work[1].r = (real) i__1, work[1].i = 0.f; + lquery = *lwork == -1; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -10; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CHSEQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Z, if necessary */ + + if (initz) { + claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz); + } + +/* Store the eigenvalues isolated by CGEBAL. */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + i__ * h_dim1; + w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + i__ * h_dim1; + w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; +/* L20: */ + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* + Set rows and columns ILO to IHI to zero below the first + subdiagonal. +*/ + + i__1 = *ihi - 2; + for (j = *ilo; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j + 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * h_dim1; + h__[i__3].r = 0.f, h__[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + nh = *ihi - *ilo + 1; + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are re-set inside the main loop. +*/ + + if (wantt) { + i1 = 1; + i2 = *n; + } else { + i1 = *ilo; + i2 = *ihi; + } + +/* Ensure that the subdiagonal elements are real. */ + + i__1 = *ihi; + for (i__ = *ilo + 1; i__ <= i__1; ++i__) { + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (r_imag(&temp) != 0.f) { + r__1 = temp.r; + r__2 = r_imag(&temp); + rtemp = slapy2_(&r__1, &r__2); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.f; + q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; + temp.r = q__1.r, temp.i = q__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + r_cnjg(&q__1, &temp); + cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (i__ < *ihi) { + i__2 = i__ + 1 + i__ * h_dim1; + i__3 = i__ + 1 + i__ * h_dim1; + q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i = + temp.r * h__[i__3].i + temp.i * h__[i__3].r; + h__[i__2].r = q__1.r, h__[i__2].i = q__1.i; + } + if (wantz) { + cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); + } + } +/* L50: */ + } + +/* + Determine the order of the multi-shift QR algorithm to be used. + + Writing concatenation +*/ + i__4[0] = 1, a__1[0] = job; + i__4[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); + ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); +/* Writing concatenation */ + i__4[0] = 1, a__1[0] = job; + i__4[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); + maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (ns <= 1 || ns > nh || maxb >= nh) { + +/* Use the standard double-shift algorithm */ + + clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, + ihi, &z__[z_offset], ldz, info); + return 0; + } + maxb = max(2,maxb); +/* Computing MIN */ + i__1 = min(ns,maxb); + ns = min(i__1,15); + +/* + Now 1 < NS <= MAXB < NH. + + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (nh / ulp); + +/* ITN is the total number of multiple-shift QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of at most MAXB. Each iteration of the loop + works with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L60: + if (i__ < *ilo) { + goto L180; + } + +/* + Perform multiple-shift QR iterations on rows and columns ILO to I + until a submatrix of order at most MAXB splits off at the bottom + because a subdiagonal element has become negligible. +*/ + + l = *ilo; + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + i__3 = k - 1 + (k - 1) * h_dim1; + i__5 = k + k * h_dim1; + tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k - + 1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5] + .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]), + dabs(r__4))); + if (tst1 == 0.f) { + i__3 = i__ - l + 1; + tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); + } + i__3 = k + (k - 1) * h_dim1; +/* Computing MAX */ + r__2 = ulp * tst1; + if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) { + goto L80; + } +/* L70: */ + } +L80: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible. */ + + i__2 = l + (l - 1) * h_dim1; + h__[i__2].r = 0.f, h__[i__2].i = 0.f; + } + +/* Exit from loop if a submatrix of order <= MAXB has split off. */ + + if (l >= i__ - maxb + 1) { + goto L170; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! wantt) { + i1 = l; + i2 = i__; + } + + if (its == 20 || its == 30) { + +/* Exceptional shifts. */ + + i__2 = i__; + for (ii = i__ - ns + 1; ii <= i__2; ++ii) { + i__3 = ii; + i__5 = ii + (ii - 1) * h_dim1; + i__6 = ii + ii * h_dim1; + r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6] + .r, dabs(r__2))) * 1.5f; + w[i__3].r = r__3, w[i__3].i = 0.f; +/* L90: */ + } + } else { + +/* Use eigenvalues of trailing submatrix of order NS as shifts. */ + + clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * + h_dim1], ldh, s, &c__15); + clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - + ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); + if (ierr > 0) { + +/* + If CLAHQR failed to compute all NS eigenvalues, use the + unconverged diagonal elements as the remaining shifts. +*/ + + i__2 = ierr; + for (ii = 1; ii <= i__2; ++ii) { + i__3 = i__ - ns + ii; + i__5 = ii + ii * 15 - 16; + w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i; +/* L100: */ + } + } + } + +/* + Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) + where G is the Hessenberg submatrix H(L:I,L:I) and w is + the vector of shifts (stored in W). The result is + stored in the local array V. +*/ + + v[0].r = 1.f, v[0].i = 0.f; + i__2 = ns + 1; + for (ii = 2; ii <= i__2; ++ii) { + i__3 = ii - 1; + v[i__3].r = 0.f, v[i__3].i = 0.f; +/* L110: */ + } + nv = 1; + i__2 = i__; + for (j = i__ - ns + 1; j <= i__2; ++j) { + i__3 = nv + 1; + ccopy_(&i__3, v, &c__1, vv, &c__1); + i__3 = nv + 1; + i__5 = j; + q__1.r = -w[i__5].r, q__1.i = -w[i__5].i; + cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1], + ldh, vv, &c__1, &q__1, v, &c__1); + ++nv; + +/* + Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, + reset it to the unit vector. +*/ + + itemp = icamax_(&nv, v, &c__1); + i__3 = itemp - 1; + rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp + - 1]), dabs(r__2)); + if (rtemp == 0.f) { + v[0].r = 1.f, v[0].i = 0.f; + i__3 = nv; + for (ii = 2; ii <= i__3; ++ii) { + i__5 = ii - 1; + v[i__5].r = 0.f, v[i__5].i = 0.f; +/* L120: */ + } + } else { + rtemp = dmax(rtemp,smlnum); + r__1 = 1.f / rtemp; + csscal_(&nv, &r__1, v, &c__1); + } +/* L130: */ + } + +/* Multiple-shift QR step */ + + i__2 = i__ - 1; + for (k = l; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__3 = ns + 1, i__5 = i__ - k + 1; + nr = min(i__3,i__5); + if (k > l) { + ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + clarfg_(&nr, v, &v[1], &c__1, &tau); + if (k > l) { + i__3 = k + (k - 1) * h_dim1; + h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; + i__3 = i__; + for (ii = k + 1; ii <= i__3; ++ii) { + i__5 = ii + (k - 1) * h_dim1; + h__[i__5].r = 0.f, h__[i__5].i = 0.f; +/* L140: */ + } + } + v[0].r = 1.f, v[0].i = 0.f; + +/* + Apply G' from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2 - k + 1; + r_cnjg(&q__1, &tau); + clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, & + work[1]); + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+NR,I). + + Computing MIN +*/ + i__5 = k + nr; + i__3 = min(i__5,i__) - i1 + 1; + clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, + &work[1]); + + if (wantz) { + +/* Accumulate transformations in the matrix Z */ + + clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], + ldz, &work[1]); + } +/* L150: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (r_imag(&temp) != 0.f) { + r__1 = temp.r; + r__2 = r_imag(&temp); + rtemp = slapy2_(&r__1, &r__2); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.f; + q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; + temp.r = q__1.r, temp.i = q__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + r_cnjg(&q__1, &temp); + cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (wantz) { + cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); + } + } + +/* L160: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L170: + +/* + A submatrix of order <= MAXB in rows and columns L to I has split + off. Use the double-shift QR algorithm to handle it. +*/ + + clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi, + &z__[z_offset], ldz, info); + if (*info > 0) { + return 0; + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with a new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L60; + +L180: + i__1 = max(1,*n); + work[1].r = (real) i__1, work[1].i = 0.f; + return 0; + +/* End of CHSEQR */ + +} /* chseqr_ */ + +/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a, + integer *lda, real *d__, real *e, complex *tauq, complex *taup, + complex *x, integer *ldx, complex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex alpha; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgemv_(char *, integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *), clarfg_(integer *, complex *, complex *, + integer *, complex *), clacgv_(integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLABRD reduces the first NB rows and columns of a complex general + m by n matrix A to upper or lower real bidiagonal form by a unitary + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by CGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the unitary matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) REAL array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) COMPLEX array dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + X (output) COMPLEX array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= max(1,M). + + Y (output) COMPLEX array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (output) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + ( + i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & + c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ + + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + clacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ + + (i__ + 1) * a_dim1], lda); + clacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, + &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b55, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b55, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], + lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ + + i__ * a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *m) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ + + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + +/* Update A(i+1:m,i) */ + + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * + a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], & + c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b55, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1 + + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b55, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1) + * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } else { + i__2 = *n - i__ + 1; + clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + return 0; + +/* End of CLABRD */ + +} /* clabrd_ */ + +/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, ioff; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLACGV conjugates a complex vector of length N. + + Arguments + ========= + + N (input) INTEGER + The length of the vector X. N >= 0. + + X (input/output) COMPLEX array, dimension + (1+(N-1)*abs(INCX)) + On entry, the vector of length N to be conjugated. + On exit, X is overwritten with conjg(X). + + INCX (input) INTEGER + The spacing between successive elements of X. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + r_cnjg(&q__1, &x[i__]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; +/* L10: */ + } + } else { + ioff = 1; + if (*incx < 0) { + ioff = 1 - (*n - 1) * *incx; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ioff; + r_cnjg(&q__1, &x[ioff]); + x[i__2].r = q__1.r, x[i__2].i = q__1.i; + ioff += *incx; +/* L20: */ + } + } + return 0; + +/* End of CLACGV */ + +} /* clacgv_ */ + +/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a, + integer *lda, complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLACP2 copies all or part of a real two-dimensional matrix A to a + complex matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.f; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of CLACP2 */ + +} /* clacp2_ */ + +/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a, + integer *lda, complex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + CLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of CLACPY */ + +} /* clacpy_ */ + +/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda, + real *b, integer *ldb, complex *c__, integer *ldc, real *rwork) +{ + /* System generated locals */ + integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLACRM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by N and complex; B is N by N and real; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) COMPLEX array, dimension (LDA, N) + A contains the M by N matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) REAL array, dimension (LDB, N) + B contains the N by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,N). + + C (input) COMPLEX array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,N). + + RWORK (workspace) REAL array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + rwork[(j - 1) * *m + i__] = a[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, & + c_b1101, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]); +/* L50: */ + } +/* L60: */ + } + sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, & + c_b1101, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + r__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + q__1.r = r__1, q__1.i = rwork[i__5]; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of CLACRM */ + +} /* clacrm_ */ + +/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y) +{ + /* System generated locals */ + real r__1, r__2, r__3, r__4; + complex q__1; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static real zi, zr; + extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * + , real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLADIV := X / Y, where X and Y are complex. The computation of X / Y + will not overflow on an intermediary step unless the results + overflows. + + Arguments + ========= + + X (input) COMPLEX + Y (input) COMPLEX + The complex scalars X and Y. + + ===================================================================== +*/ + + + r__1 = x->r; + r__2 = r_imag(x); + r__3 = y->r; + r__4 = r_imag(y); + sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi); + q__1.r = zr, q__1.i = zi; + ret_val->r = q__1.r, ret_val->i = q__1.i; + + return ; + +/* End of CLADIV */ + +} /* cladiv_ */ + +/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e, + complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + real r__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; + static real temp; + static integer curr, iperm; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static integer indxq, iwrem; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer iqptr; + extern /* Subroutine */ int claed7_(integer *, integer *, integer *, + integer *, integer *, integer *, real *, complex *, integer *, + real *, integer *, real *, integer *, integer *, integer *, + integer *, integer *, real *, complex *, real *, integer *, + integer *); + static integer tlvls; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz, + iprmpt, smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + Using the divide and conquer method, CLAED0 computes all eigenvalues + of a symmetric tridiagonal matrix which is one diagonal block of + those from reducing a dense or band Hermitian matrix and + corresponding eigenvectors of the dense or band matrix. + + Arguments + ========= + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, Q must contain an QSIZ x N matrix whose columns + unitarily orthonormal. It is a part of the unitary matrix + that reduces the full dense Hermitian matrix to a + (reducible) symmetric tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IWORK (workspace) INTEGER array, + the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + RWORK (workspace) REAL array, + dimension (1 + 3*N + 2*N*lg N + 3*N**2) + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + QSTORE (workspace) COMPLEX array, dimension (LDQS, N) + Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. + LDQS >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + ===================================================================== + + Warning: N could be as big as QSIZ! + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) + $ THEN +*/ + if (*qsiz < max(0,*n)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else if (*ldqs < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); + d__[submat] -= (r__1 = e[smm1], dabs(r__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; +/* Initialize pointers */ + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + ll = iq - 1 + iwork[iqptr + curr]; + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & + rwork[1], info); + clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & + matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] + ); +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. CLAED7 handles the case + when the eigenvectors of a full or band Hermitian matrix (which + was reduced to tridiagonal form) are desired. + + I am free to use Q as a valuable working space until Loop 150. +*/ + + claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ + submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & + iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ + igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * + q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + rwork[i__] = d__[j]; + ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] + , &c__1); +/* L100: */ + } + scopy_(n, &rwork[1], &c__1, &d__[1], &c__1); + + return 0; + +/* End of CLAED0 */ + +} /* claed0_ */ + +/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex * + q, integer *ldq, real *rho, integer *indxq, real *qstore, integer * + qptr, integer *prmptr, integer *perm, integer *givptr, integer * + givcol, real *givnum, complex *work, real *rwork, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr, + indxc, indxp; + extern /* Subroutine */ int claed8_(integer *, integer *, integer *, + complex *, integer *, real *, real *, integer *, real *, real *, + complex *, integer *, real *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, integer *), slaed9_( + integer *, integer *, integer *, integer *, real *, real *, + integer *, real *, real *, real *, real *, integer *, integer *), + slaeda_(integer *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, integer *); + static integer idlmda; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *), + xerbla_(char *, integer *), slamrg_(integer *, integer *, + real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense or banded + Hermitian matrix that has been reduced to tridiagonal form. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) REAL + Contains the subdiagonal element used to create the rank-1 + modification. + + INDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, + ie. D( INDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension (4*N) + + RWORK (workspace) REAL array, + dimension (3*N+2*QSIZ*N) + + WORK (workspace) COMPLEX array, dimension (QSIZ*N) + + QSTORE (input/output) REAL array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN +*/ + if (*n < 0) { + *info = -1; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ + iz + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], + &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ + indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ + (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] + , &rwork[iw], &qstore[qptr[curr]], &k, info); + clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ + q_offset], ldq, &rwork[iq]); +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + if (*info != 0) { + return 0; + } + +/* Prepare the INDXQ sorting premutation. */ + + n1 = k; + n2 = *n - k; + ind1 = 1; + ind2 = *n; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + + return 0; + +/* End of CLAED7 */ + +} /* claed7_ */ + +/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex * + q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, + real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, + integer *indx, integer *indxq, integer *perm, integer *givptr, + integer *givcol, real *givnum, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n1, n2, jp, n1p1; + static real eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + ccopy_(integer *, complex *, integer *, complex *, integer *), + csrot_(integer *, complex *, integer *, complex *, integer *, + real *, real *), scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), xerbla_(char *, + integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + CLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + Contains the number of non-deflated eigenvalues. + This is the order of the related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the dense or band matrix to tridiagonal form. + QSIZ >= N if ICOMPQ = 1. + + Q (input/output) COMPLEX array, dimension (LDQ,N) + On entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + D (input/output) REAL array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. On exit, D contains the trailing (N-K) updated + eigenvalues (those which were deflated) sorted into increasing + order. + + RHO (input/output) REAL + Contains the off diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. RHO is modified during the computation to + the value required by SLAED3. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. MIN(1,N) <= CUTPNT <= N. + + Z (input) REAL array, dimension (N) + On input this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). The contents of Z are + destroyed during the updating process. + + DLAMDA (output) REAL array, dimension (N) + Contains a copy of the first K eigenvalues which will be used + by SLAED3 to form the secular equation. + + Q2 (output) COMPLEX array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + Contains a copy of the first K eigenvectors which will be used + by SLAED7 in a matrix multiply (SGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max( 1, N ). + + W (output) REAL array, dimension (N) + This will hold the first k values of the final + deflation-altered z-vector and will be passed to SLAED3. + + INDXP (workspace) INTEGER array, dimension (N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output INDXP(1:K) + points to the nondeflated D-values and INDXP(K+1:N) + points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + This will contain the permutation used to sort the contents of + D into ascending order. + + INDXQ (input) INTEGER array, dimension (N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that elements in + the second half of this permutation must first have CUTPNT + added to their values in order to be accurate. + + PERM (output) INTEGER array, dimension (N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (output) INTEGER + Contains the number of Givens rotations which took place in + this subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) REAL array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --d__; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --indxp; + --indx; + --indxq; + --perm; + givcol -= 3; + givnum -= 3; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -5; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -8; + } else if (*ldq2 < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAED8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b1150, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1.f / sqrt(2.f); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + sscal_(n, &t, &z__[1], &c__1); + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); + tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + -- except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L50: */ + } + clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + *givptr = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L100; + } + } else { + jlam = j; + goto L70; + } +/* L60: */ + } +L70: + ++j; + if (j > *n) { + goto L90; + } + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.f; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ + indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L80: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L80; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L70; +L90: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L100: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & + c__1); +/* L110: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + + 1) * q_dim1 + 1], ldq); + } + + return 0; + +/* End of CLAED8 */ + +} /* claed8_ */ + +/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, + integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer * + info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4, r__5, r__6; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + double r_imag(complex *); + void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *); + double c_abs(complex *); + + /* Local variables */ + static integer i__, j, k, l, m; + static real s; + static complex t, u, v[2], x, y; + static integer i1, i2; + static complex t1; + static real t2; + static complex v2; + static real h10; + static complex h11; + static real h21; + static complex h22; + static integer nh, nz; + static complex h11s; + static integer itn, its; + static real ulp; + static complex sum; + static real tst1; + static complex temp; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), ccopy_(integer *, complex *, integer *, complex *, + integer *); + static real rtemp, rwork[1]; + extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, + integer *, complex *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *), clanhs_(char *, integer *, + complex *, integer *, real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLAHQR is an auxiliary routine called by CHSEQR to update the + eigenvalues and Schur decomposition already computed by CHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows and + columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). + CLAHQR works primarily with the Hessenberg submatrix in rows + and columns ILO to IHI, but applies transformations to all of + H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) COMPLEX array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if WANTT is .TRUE., H is upper triangular in rows + and columns ILO:IHI, with any 2-by-2 diagonal blocks in + standard form. If WANTT is .FALSE., the contents of H are + unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX array, dimension (N) + The computed eigenvalues ILO to IHI are stored in the + corresponding elements of W. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with W(i) = H(i,i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by CHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = i, CLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) + iterations; elements i+1:ihi of W contain those + eigenvalues which have been successfully computed. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + ulp = slamch_("Precision"); + smlnum = slamch_("Safe minimum") / ulp; + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L10: + if (i__ < *ilo) { + goto L130; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + l = *ilo; + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + i__3 = k - 1 + (k - 1) * h_dim1; + i__4 = k + k * h_dim1; + tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k - + 1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4] + .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]), + dabs(r__4))); + if (tst1 == 0.f) { + i__3 = i__ - l + 1; + tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); + } + i__3 = k + (k - 1) * h_dim1; +/* Computing MAX */ + r__2 = ulp * tst1; + if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) { + goto L30; + } +/* L20: */ + } +L30: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + i__2 = l + (l - 1) * h_dim1; + h__[i__2].r = 0.f, h__[i__2].i = 0.f; + } + +/* Exit from loop if a submatrix of order 1 has split off. */ + + if (l >= i__) { + goto L120; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10 || its == 20) { + +/* Exceptional shift. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f; + i__2 = i__ + i__ * h_dim1; + q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i; + t.r = q__1.r, t.i = q__1.i; + } else { + +/* Wilkinson's shift. */ + + i__2 = i__ + i__ * h_dim1; + t.r = h__[i__2].r, t.i = h__[i__2].i; + i__2 = i__ - 1 + i__ * h_dim1; + i__3 = i__ + (i__ - 1) * h_dim1; + r__1 = h__[i__3].r; + q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i; + u.r = q__1.r, u.i = q__1.i; + if (u.r != 0.f || u.i != 0.f) { + i__2 = i__ - 1 + (i__ - 1) * h_dim1; + q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i; + q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f; + x.r = q__1.r, x.i = q__1.i; + q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i * + x.r; + q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i; + c_sqrt(&q__1, &q__2); + y.r = q__1.r, y.i = q__1.i; + if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) { + q__1.r = -y.r, q__1.i = -y.i; + y.r = q__1.r, y.i = q__1.i; + } + q__3.r = x.r + y.r, q__3.i = x.i + y.i; + cladiv_(&q__2, &u, &q__3); + q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i; + t.r = q__1.r, t.i = q__1.i; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__2 = l + 1; + for (m = i__ - 1; m >= i__2; --m) { + +/* + Determine the effect of starting the single-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + i__3 = m + m * h_dim1; + h11.r = h__[i__3].r, h11.i = h__[i__3].i; + i__3 = m + 1 + (m + 1) * h_dim1; + h22.r = h__[i__3].r, h22.i = h__[i__3].i; + q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; + h11s.r = q__1.r, h11s.i = q__1.i; + i__3 = m + 1 + m * h_dim1; + h21 = h__[i__3].r; + s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs( + r__2)) + dabs(h21); + q__1.r = h11s.r / s, q__1.i = h11s.i / s; + h11s.r = q__1.r, h11s.i = q__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.f; + i__3 = m + (m - 1) * h_dim1; + h10 = h__[i__3].r; + tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs( + r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(& + h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 = + r_imag(&h22), dabs(r__6)))); + if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) { + goto L50; + } +/* L40: */ + } + i__2 = l + l * h_dim1; + h11.r = h__[i__2].r, h11.i = h__[i__2].i; + i__2 = l + 1 + (l + 1) * h_dim1; + h22.r = h__[i__2].r, h22.i = h__[i__2].i; + q__1.r = h11.r - t.r, q__1.i = h11.i - t.i; + h11s.r = q__1.r, h11s.i = q__1.i; + i__2 = l + 1 + l * h_dim1; + h21 = h__[i__2].r; + s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2)) + + dabs(h21); + q__1.r = h11s.r / s, q__1.i = h11s.i / s; + h11s.r = q__1.r, h11s.i = q__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.f; +L50: + +/* Single-shift QR step */ + + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. + + V(2) is always real before the call to CLARFG, and hence + after the call T2 ( = T1*V(2) ) is also real. +*/ + + if (k > m) { + ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + clarfg_(&c__2, v, &v[1], &c__1, &t1); + if (k > m) { + i__3 = k + (k - 1) * h_dim1; + h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; + i__3 = k + 1 + (k - 1) * h_dim1; + h__[i__3].r = 0.f, h__[i__3].i = 0.f; + } + v2.r = v[1].r, v2.i = v[1].i; + q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i * + v2.r; + t2 = q__1.r; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + r_cnjg(&q__3, &t1); + i__4 = k + j * h_dim1; + q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i = + q__3.r * h__[i__4].i + q__3.i * h__[i__4].r; + i__5 = k + 1 + j * h_dim1; + q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + sum.r = q__1.r, sum.i = q__1.i; + i__4 = k + j * h_dim1; + i__5 = k + j * h_dim1; + q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; + i__4 = k + 1 + j * h_dim1; + i__5 = k + 1 + j * h_dim1; + q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i + + sum.i * v2.r; + q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; +/* L60: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+2,I). + + Computing MIN +*/ + i__4 = k + 2; + i__3 = min(i__4,i__); + for (j = i1; j <= i__3; ++j) { + i__4 = j + k * h_dim1; + q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i = + t1.r * h__[i__4].i + t1.i * h__[i__4].r; + i__5 = j + (k + 1) * h_dim1; + q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__4 = j + k * h_dim1; + i__5 = j + k * h_dim1; + q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; + i__4 = j + (k + 1) * h_dim1; + i__5 = j + (k + 1) * h_dim1; + r_cnjg(&q__3, &v2); + q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * + q__3.i + sum.i * q__3.r; + q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i; + h__[i__4].r = q__1.r, h__[i__4].i = q__1.i; +/* L70: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + i__4 = j + k * z_dim1; + q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i = + t1.r * z__[i__4].i + t1.i * z__[i__4].r; + i__5 = j + (k + 1) * z_dim1; + q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__4 = j + k * z_dim1; + i__5 = j + k * z_dim1; + q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i - + sum.i; + z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; + i__4 = j + (k + 1) * z_dim1; + i__5 = j + (k + 1) * z_dim1; + r_cnjg(&q__3, &v2); + q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r * + q__3.i + sum.i * q__3.r; + q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i - + q__2.i; + z__[i__4].r = q__1.r, z__[i__4].i = q__1.i; +/* L80: */ + } + } + + if (k == m && m > l) { + +/* + If the QR step was started at row M > L because two + consecutive small subdiagonals were found, then extra + scaling must be performed to ensure that H(M,M-1) remains + real. +*/ + + q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i; + temp.r = q__1.r, temp.i = q__1.i; + r__1 = c_abs(&temp); + q__1.r = temp.r / r__1, q__1.i = temp.i / r__1; + temp.r = q__1.r, temp.i = q__1.i; + i__3 = m + 1 + m * h_dim1; + i__4 = m + 1 + m * h_dim1; + r_cnjg(&q__2, &temp); + q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i = + h__[i__4].r * q__2.i + h__[i__4].i * q__2.r; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; + if (m + 2 <= i__) { + i__3 = m + 2 + (m + 1) * h_dim1; + i__4 = m + 2 + (m + 1) * h_dim1; + q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i, + q__1.i = h__[i__4].r * temp.i + h__[i__4].i * + temp.r; + h__[i__3].r = q__1.r, h__[i__3].i = q__1.i; + } + i__3 = i__; + for (j = m; j <= i__3; ++j) { + if (j != m + 1) { + if (i2 > j) { + i__4 = i2 - j; + cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1], + ldh); + } + i__4 = j - i1; + r_cnjg(&q__1, &temp); + cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1); + if (*wantz) { + r_cnjg(&q__1, &temp); + cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], & + c__1); + } + } +/* L90: */ + } + } +/* L100: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (r_imag(&temp) != 0.f) { + rtemp = c_abs(&temp); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.f; + q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; + temp.r = q__1.r, temp.i = q__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + r_cnjg(&q__1, &temp); + cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (*wantz) { + cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); + } + } + +/* L110: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L120: + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + i__1 = i__; + i__2 = i__ + i__ * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + +/* + Decrement number of remaining iterations, and return to start of + the main loop with new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L10; + +L130: + return 0; + +/* End of CLAHQR */ + +} /* clahqr_ */ + +/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, + integer *lda, complex *tau, complex *t, integer *ldt, complex *y, + integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + complex q__1; + + /* Local variables */ + static integer i__; + static complex ei; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), cgemv_(char *, integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *), ccopy_(integer *, complex *, integer *, + complex *, integer *), caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *), ctrmv_(char *, char *, char *, + integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer + *, complex *), clacgv_(integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by a unitary similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by CGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) COMPLEX array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) COMPLEX array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a h a a a ) + ( a h a a a ) + ( a h a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(1:n,i) + + Compute i-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + i__2 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k + + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, + &t[*nb * t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; + a[i__2].r = ei.r, a[i__2].i = ei.i; + } + +/* + Generate the elementary reflector H(i) to annihilate + A(k+i+1:n,i) +*/ + + i__2 = *k + i__ + i__ * a_dim1; + ei.r = a[i__2].r, ei.i = a[i__2].i; + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__]) + ; + i__2 = *k + i__ + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute Y(1:n,i) */ + + i__2 = *n - *k - i__ + 1; + cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[ + i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ * + t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1); + cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); + +/* Compute T(1:i,i) */ + + i__2 = i__ - 1; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + +/* L10: */ + } + i__1 = *k + *nb + *nb * a_dim1; + a[i__1].r = ei.r, a[i__1].i = ei.i; + + return 0; + +/* End of CLAHRD */ + +} /* clahrd_ */ + +doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer * + lda, real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2; + + /* Builtin functions */ + double c_abs(complex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex matrix A. + + Description + =========== + + CLANGE returns the value + + CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in CLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + CLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + CLANGE is set to zero. + + A (input) COMPLEX array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += c_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += c_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of CLANGE */ + +} /* clange_ */ + +doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer * + lda, real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Builtin functions */ + double c_abs(complex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, absa, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLANHE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex hermitian matrix A. + + Description + =========== + + CLANHE returns the value + + CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in CLANHE as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + hermitian matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, CLANHE is + set to zero. + + A (input) COMPLEX array, dimension (LDA,N) + The hermitian matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. Note that the imaginary parts of the diagonal + elements need not be set and are assumed to be zero. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L10: */ + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); + value = dmax(r__2,r__3); +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1)); + value = dmax(r__2,r__3); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is hermitian). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = c_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L50: */ + } + i__2 = j + j * a_dim1; + work[j] = sum + (r__1 = a[i__2].r, dabs(r__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = work[j] + (r__1 = a[i__2].r, dabs(r__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = c_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = dmax(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (a[i__2].r != 0.f) { + i__2 = i__ + i__ * a_dim1; + absa = (r__1 = a[i__2].r, dabs(r__1)); + if (scale < absa) { +/* Computing 2nd power */ + r__1 = scale / absa; + sum = sum * (r__1 * r__1) + 1.f; + scale = absa; + } else { +/* Computing 2nd power */ + r__1 = absa / scale; + sum += r__1 * r__1; + } + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of CLANHE */ + +} /* clanhe_ */ + +doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real * + work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + real ret_val, r__1, r__2; + + /* Builtin functions */ + double c_abs(complex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int classq_(integer *, complex *, integer *, real + *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLANHS returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + Hessenberg matrix A. + + Description + =========== + + CLANHS returns the value + + CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in CLANHS as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, CLANHS is + set to zero. + + A (input) COMPLEX array, dimension (LDA,N) + The n by n upper Hessenberg matrix A; the part of A below the + first sub-diagonal is not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= N when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]); + value = dmax(r__1,r__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += c_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += c_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of CLANHS */ + +} /* clanhs_ */ + +/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda, + complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + real r__1; + complex q__1; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLARCM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by M and real; B is M by N and complex; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) REAL array, dimension (LDA, M) + A contains the M by M matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) REAL array, dimension (LDB, N) + B contains the M by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,M). + + C (input) COMPLEX array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,M). + + RWORK (workspace) REAL array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[(j - 1) * *m + i__] = b[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, & + c_b1101, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]); +/* L50: */ + } +/* L60: */ + } + sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, & + c_b1101, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + r__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + q__1.r = r__1, q__1.i = rwork[i__5]; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of CLARCM */ + +} /* clarcm_ */ + +/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v, + integer *incv, complex *tau, complex *c__, integer *ldc, complex * + work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + complex q__1; + + /* Local variables */ + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *), + cgemv_(char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *, complex *, complex *, integer *); + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLARF applies a complex elementary reflector H to a complex M-by-N + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix. + + To apply H' (the conjugate transpose of H), supply conjg(tau) instead + tau. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) COMPLEX + The value tau in the representation of H. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (lsame_(side, "L")) { + +/* Form H * C */ + + if (tau->r != 0.f || tau->i != 0.f) { + +/* w := C' * v */ + + cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, & + v[1], incv, &c_b55, &work[1], &c__1); + +/* C := C - v * w' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], + ldc); + } + } else { + +/* Form C * H */ + + if (tau->r != 0.f || tau->i != 0.f) { + +/* w := C * v */ + + cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], + incv, &c_b55, &work[1], &c__1); + +/* C := C - w * v' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], + ldc); + } + } + return 0; + +/* End of CLARF */ + +} /* clarf_ */ + +/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, complex *v, integer *ldv, + complex *t, integer *ldt, complex *c__, integer *ldc, complex *work, + integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *), ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), clacgv_(integer *, + complex *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLARFB applies a complex block reflector H or its transpose H' to a + complex M-by-N matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'C': apply H' (Conjugate transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) COMPLEX array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) COMPLEX array, dimension (LDT,K) + The triangular K-by-K matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + clacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2 */ + + i__1 = *m - *k; + cgemm_("Conjugate transpose", "No transpose", n, k, &i__1, + &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b56, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = *m - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__1, n, k, + &q__1, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b56, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, + &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + cgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b56, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = *n - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", m, &i__1, k, + &q__1, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, + &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + clacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*m > *k) { + +/* W := W + C1'*V1 */ + + i__1 = *m - *k; + cgemm_("Conjugate transpose", "No transpose", n, k, &i__1, + &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b56, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = *m - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__1, n, k, + &q__1, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b56, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, + &c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + cgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b56, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = *n - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", m, &i__1, k, + &q__1, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b56, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, + &c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + clacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, + &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2' */ + + i__1 = *m - *k; + cgemm_("Conjugate transpose", "Conjugate transpose", n, k, + &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(* + k + 1) * v_dim1 + 1], ldv, &c_b56, &work[ + work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = *m - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv, + &work[work_offset], ldwork, &c_b56, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, + &c_b56, &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2' */ + + i__1 = *n - *k; + cgemm_("No transpose", "Conjugate transpose", m, k, &i__1, + &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b56, &work[ + work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1, + &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1 */ + + ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + clacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, + &c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (*m > *k) { + +/* W := W + C1'*V1' */ + + i__1 = *m - *k; + cgemm_("Conjugate transpose", "Conjugate transpose", n, k, + &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b56, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = *m - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, n, k, &q__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b56, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + r_cnjg(&q__2, &work[i__ + j * work_dim1]); + q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i - + q__2.i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, + &c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (*n > *k) { + +/* W := W + C1 * V1' */ + + i__1 = *n - *k; + cgemm_("No transpose", "Conjugate transpose", m, k, &i__1, + &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b56, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1, + &work[work_offset], ldwork, &v[v_offset], ldv, & + c_b56, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = q__1.r, c__[i__3].i = q__1.i; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of CLARFB */ + +} /* clarfb_ */ + +/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer * + incx, complex *tau) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + complex q__1, q__2; + + /* Builtin functions */ + double r_imag(complex *), r_sign(real *, real *); + + /* Local variables */ + static integer j, knt; + static real beta; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + static real alphi, alphr, xnorm; + extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real * + , real *, real *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *); + static real safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLARFG generates a complex elementary reflector H of order n, such + that + + H' * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, with beta real, and x is an + (n-1)-element complex vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a complex scalar and v is a complex (n-1)-element + vector. Note that H is not hermitian. + + If the elements of x are all zero and alpha is real, then tau = 0 + and H is taken to be the unit matrix. + + Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) COMPLEX + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) COMPLEX array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) COMPLEX + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + tau->r = 0.f, tau->i = 0.f; + return 0; + } + + i__1 = *n - 1; + xnorm = scnrm2_(&i__1, &x[1], incx); + alphr = alpha->r; + alphi = r_imag(alpha); + + if (xnorm == 0.f && alphi == 0.f) { + +/* H = I */ + + tau->r = 0.f, tau->i = 0.f; + } else { + +/* general case */ + + r__1 = slapy3_(&alphr, &alphi, &xnorm); + beta = -r_sign(&r__1, &alphr); + safmin = slamch_("S") / slamch_("E"); + rsafmn = 1.f / safmin; + + if (dabs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + knt = 0; +L10: + ++knt; + i__1 = *n - 1; + csscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + alphi *= rsafmn; + alphr *= rsafmn; + if (dabs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = scnrm2_(&i__1, &x[1], incx); + q__1.r = alphr, q__1.i = alphi; + alpha->r = q__1.r, alpha->i = q__1.i; + r__1 = slapy3_(&alphr, &alphi, &xnorm); + beta = -r_sign(&r__1, &alphr); + r__1 = (beta - alphr) / beta; + r__2 = -alphi / beta; + q__1.r = r__1, q__1.i = r__2; + tau->r = q__1.r, tau->i = q__1.i; + q__2.r = alpha->r - beta, q__2.i = alpha->i; + cladiv_(&q__1, &c_b56, &q__2); + alpha->r = q__1.r, alpha->i = q__1.i; + i__1 = *n - 1; + cscal_(&i__1, alpha, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + alpha->r = beta, alpha->i = 0.f; + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i; + alpha->r = q__1.r, alpha->i = q__1.i; +/* L20: */ + } + } else { + r__1 = (beta - alphr) / beta; + r__2 = -alphi / beta; + q__1.r = r__1, q__1.i = r__2; + tau->r = q__1.r, tau->i = q__1.i; + q__2.r = alpha->r - beta, q__2.i = alpha->i; + cladiv_(&q__1, &c_b56, &q__2); + alpha->r = q__1.r, alpha->i = q__1.i; + i__1 = *n - 1; + cscal_(&i__1, alpha, &x[1], incx); + alpha->r = beta, alpha->i = 0.f; + } + } + + return 0; + +/* End of CLARFG */ + +} /* clarfg_ */ + +/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer * + k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer i__, j; + static complex vii; + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLARFT forms the triangular factor T of a complex block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) COMPLEX array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) COMPLEX array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0.f, t[i__3].i = 0.f; +/* L10: */ + } + } else { + +/* general case */ + + i__2 = i__ + i__ * v_dim1; + vii.r = v[i__2].r, vii.i = v[i__2].i; + i__2 = i__ + i__ * v_dim1; + v[i__2].r = 1.f, v[i__2].i = 0.f; + if (lsame_(storev, "C")) { + +/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__ + + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & + c_b55, &t[i__ * t_dim1 + 1], &c__1); + } else { + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ + + if (i__ < *n) { + i__2 = *n - i__; + clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + i__4 = i__; + q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i; + cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b55, &t[i__ * t_dim1 + 1], &c__1); + if (i__ < *n) { + i__2 = *n - i__; + clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + } + i__2 = i__ + i__ * v_dim1; + v[i__2].r = vii.r, v[i__2].i = vii.i; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + } +/* L20: */ + } + } else { + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0.f, t[i__2].i = 0.f; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + i__1 = *n - *k + i__ + i__ * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = 1.f, v[i__1].i = 0.f; + +/* + T(i+1:k,i) := + - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +*/ + + i__1 = *n - *k + i__; + i__2 = *k - i__; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[ + (i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1 + + 1], &c__1, &c_b55, &t[i__ + 1 + i__ * + t_dim1], &c__1); + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } else { + i__1 = i__ + (*n - *k + i__) * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = 1.f, v[i__1].i = 0.f; + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +*/ + + i__1 = *n - *k + i__ - 1; + clacgv_(&i__1, &v[i__ + v_dim1], ldv); + i__1 = *k - i__; + i__2 = *n - *k + i__; + i__3 = i__; + q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; + cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & + c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1); + i__1 = *n - *k + i__ - 1; + clacgv_(&i__1, &v[i__ + v_dim1], ldv); + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } +/* L40: */ + } + } + return 0; + +/* End of CLARFT */ + +} /* clarft_ */ + +/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v, + complex *tau, complex *c__, integer *ldc, complex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + i__9, i__10, i__11; + complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10, + q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer j; + static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, + v7, v8, v9, t10, v10, sum; + extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLARFX applies a complex elementary reflector H to a complex m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) COMPLEX + The value tau in the representation of H. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) COMPLEX array, dimension (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (tau->r == 0.f && tau->i == 0.f) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* + Code for general M + + w := C'*v +*/ + + cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1] + , &c__1, &c_b55, &work[1], &c__1); + +/* C := C - tau * v * w' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], + ldc); + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i + + tau->i * v[1].r; + r_cnjg(&q__4, &v[1]); + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i + + q__3.i * q__4.r; + q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; + t1.r = q__1.r, t1.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r * + c__[i__3].i + t1.i * c__[i__3].r; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i; + i__4 = j * c_dim1 + 3; + q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i; + i__4 = j * c_dim1 + 3; + q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i; + i__5 = j * c_dim1 + 4; + q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i; + i__4 = j * c_dim1 + 3; + q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i; + i__5 = j * c_dim1 + 4; + q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i; + i__6 = j * c_dim1 + 5; + q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r * + c__[i__6].i + v5.i * c__[i__6].r; + q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + r_cnjg(&q__1, &v[6]); + v6.r = q__1.r, v6.i = q__1.i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i; + i__4 = j * c_dim1 + 3; + q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i; + i__5 = j * c_dim1 + 4; + q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i; + i__6 = j * c_dim1 + 5; + q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i; + i__7 = j * c_dim1 + 6; + q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + r_cnjg(&q__1, &v[6]); + v6.r = q__1.r, v6.i = q__1.i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + r_cnjg(&q__1, &v[7]); + v7.r = q__1.r, v7.i = q__1.i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i; + i__4 = j * c_dim1 + 3; + q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i; + i__5 = j * c_dim1 + 4; + q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i; + i__6 = j * c_dim1 + 5; + q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i; + i__7 = j * c_dim1 + 6; + q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i; + i__8 = j * c_dim1 + 7; + q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + r_cnjg(&q__1, &v[6]); + v6.r = q__1.r, v6.i = q__1.i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + r_cnjg(&q__1, &v[7]); + v7.r = q__1.r, v7.i = q__1.i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + r_cnjg(&q__1, &v[8]); + v8.r = q__1.r, v8.i = q__1.i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i; + i__4 = j * c_dim1 + 3; + q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i; + i__5 = j * c_dim1 + 4; + q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i; + i__6 = j * c_dim1 + 5; + q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i; + i__7 = j * c_dim1 + 6; + q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i; + i__8 = j * c_dim1 + 7; + q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i; + i__9 = j * c_dim1 + 8; + q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + r_cnjg(&q__1, &v[6]); + v6.r = q__1.r, v6.i = q__1.i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + r_cnjg(&q__1, &v[7]); + v7.r = q__1.r, v7.i = q__1.i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + r_cnjg(&q__1, &v[8]); + v8.r = q__1.r, v8.i = q__1.i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + r_cnjg(&q__1, &v[9]); + v9.r = q__1.r, v9.i = q__1.i; + r_cnjg(&q__2, &v9); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t9.r = q__1.r, t9.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i; + i__4 = j * c_dim1 + 3; + q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i; + i__5 = j * c_dim1 + 4; + q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i; + i__6 = j * c_dim1 + 5; + q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i; + i__7 = j * c_dim1 + 6; + q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i; + i__8 = j * c_dim1 + 7; + q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i; + i__9 = j * c_dim1 + 8; + q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i; + i__10 = j * c_dim1 + 9; + q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 9; + i__3 = j * c_dim1 + 9; + q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + + sum.i * t9.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + r_cnjg(&q__1, &v[1]); + v1.r = q__1.r, v1.i = q__1.i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + r_cnjg(&q__1, &v[2]); + v2.r = q__1.r, v2.i = q__1.i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + r_cnjg(&q__1, &v[3]); + v3.r = q__1.r, v3.i = q__1.i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + r_cnjg(&q__1, &v[4]); + v4.r = q__1.r, v4.i = q__1.i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + r_cnjg(&q__1, &v[5]); + v5.r = q__1.r, v5.i = q__1.i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + r_cnjg(&q__1, &v[6]); + v6.r = q__1.r, v6.i = q__1.i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + r_cnjg(&q__1, &v[7]); + v7.r = q__1.r, v7.i = q__1.i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + r_cnjg(&q__1, &v[8]); + v8.r = q__1.r, v8.i = q__1.i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + r_cnjg(&q__1, &v[9]); + v9.r = q__1.r, v9.i = q__1.i; + r_cnjg(&q__2, &v9); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t9.r = q__1.r, t9.i = q__1.i; + r_cnjg(&q__1, &v[10]); + v10.r = q__1.r, v10.i = q__1.i; + r_cnjg(&q__2, &v10); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t10.r = q__1.r, t10.i = q__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r + * c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i; + i__4 = j * c_dim1 + 3; + q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i; + i__5 = j * c_dim1 + 4; + q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i; + i__6 = j * c_dim1 + 5; + q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i; + i__7 = j * c_dim1 + 6; + q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i; + i__8 = j * c_dim1 + 7; + q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i; + i__9 = j * c_dim1 + 8; + q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i; + i__10 = j * c_dim1 + 9; + q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i; + i__11 = j * c_dim1 + 10; + q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i = + v10.r * c__[i__11].i + v10.i * c__[i__11].r; + q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 9; + i__3 = j * c_dim1 + 9; + q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + + sum.i * t9.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j * c_dim1 + 10; + i__3 = j * c_dim1 + 10; + q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i + + sum.i * t10.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* + Code for general N + + w := C * v +*/ + + cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], & + c__1, &c_b55, &work[1], &c__1); + +/* C := C - tau * w * v' */ + + q__1.r = -tau->r, q__1.i = -tau->i; + cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], + ldc); + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i + + tau->i * v[1].r; + r_cnjg(&q__4, &v[1]); + q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i + + q__3.i * q__4.r; + q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; + t1.r = q__1.r, t1.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r * + c__[i__3].i + t1.i * c__[i__3].r; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i; + i__4 = j + c_dim1 * 3; + q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i; + i__4 = j + c_dim1 * 3; + q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i; + i__5 = j + (c_dim1 << 2); + q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i; + i__4 = j + c_dim1 * 3; + q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i; + i__5 = j + (c_dim1 << 2); + q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i; + i__6 = j + c_dim1 * 5; + q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r * + c__[i__6].i + v5.i * c__[i__6].r; + q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + v6.r = v[6].r, v6.i = v[6].i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i; + i__4 = j + c_dim1 * 3; + q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i; + i__5 = j + (c_dim1 << 2); + q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i; + i__6 = j + c_dim1 * 5; + q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i; + i__7 = j + c_dim1 * 6; + q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + v6.r = v[6].r, v6.i = v[6].i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + v7.r = v[7].r, v7.i = v[7].i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i; + i__4 = j + c_dim1 * 3; + q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i; + i__5 = j + (c_dim1 << 2); + q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i; + i__6 = j + c_dim1 * 5; + q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i; + i__7 = j + c_dim1 * 6; + q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i; + i__8 = j + c_dim1 * 7; + q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + v6.r = v[6].r, v6.i = v[6].i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + v7.r = v[7].r, v7.i = v[7].i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + v8.r = v[8].r, v8.i = v[8].i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i; + i__4 = j + c_dim1 * 3; + q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i; + i__5 = j + (c_dim1 << 2); + q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i; + i__6 = j + c_dim1 * 5; + q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i; + i__7 = j + c_dim1 * 6; + q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i; + i__8 = j + c_dim1 * 7; + q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i; + i__9 = j + (c_dim1 << 3); + q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + v6.r = v[6].r, v6.i = v[6].i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + v7.r = v[7].r, v7.i = v[7].i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + v8.r = v[8].r, v8.i = v[8].i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + v9.r = v[9].r, v9.i = v[9].i; + r_cnjg(&q__2, &v9); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t9.r = q__1.r, t9.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i; + i__4 = j + c_dim1 * 3; + q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i; + i__5 = j + (c_dim1 << 2); + q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i; + i__6 = j + c_dim1 * 5; + q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i; + i__7 = j + c_dim1 * 6; + q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i; + i__8 = j + c_dim1 * 7; + q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i; + i__9 = j + (c_dim1 << 3); + q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i; + i__10 = j + c_dim1 * 9; + q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 9; + i__3 = j + c_dim1 * 9; + q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + + sum.i * t9.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + r_cnjg(&q__2, &v1); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t1.r = q__1.r, t1.i = q__1.i; + v2.r = v[2].r, v2.i = v[2].i; + r_cnjg(&q__2, &v2); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t2.r = q__1.r, t2.i = q__1.i; + v3.r = v[3].r, v3.i = v[3].i; + r_cnjg(&q__2, &v3); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t3.r = q__1.r, t3.i = q__1.i; + v4.r = v[4].r, v4.i = v[4].i; + r_cnjg(&q__2, &v4); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t4.r = q__1.r, t4.i = q__1.i; + v5.r = v[5].r, v5.i = v[5].i; + r_cnjg(&q__2, &v5); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t5.r = q__1.r, t5.i = q__1.i; + v6.r = v[6].r, v6.i = v[6].i; + r_cnjg(&q__2, &v6); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t6.r = q__1.r, t6.i = q__1.i; + v7.r = v[7].r, v7.i = v[7].i; + r_cnjg(&q__2, &v7); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t7.r = q__1.r, t7.i = q__1.i; + v8.r = v[8].r, v8.i = v[8].i; + r_cnjg(&q__2, &v8); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t8.r = q__1.r, t8.i = q__1.i; + v9.r = v[9].r, v9.i = v[9].i; + r_cnjg(&q__2, &v9); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t9.r = q__1.r, t9.i = q__1.i; + v10.r = v[10].r, v10.i = v[10].i; + r_cnjg(&q__2, &v10); + q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i + + tau->i * q__2.r; + t10.r = q__1.r, t10.i = q__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r + * c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i; + i__4 = j + c_dim1 * 3; + q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i; + i__5 = j + (c_dim1 << 2); + q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i; + i__6 = j + c_dim1 * 5; + q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i; + i__7 = j + c_dim1 * 6; + q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i; + i__8 = j + c_dim1 * 7; + q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i; + i__9 = j + (c_dim1 << 3); + q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i; + i__10 = j + c_dim1 * 9; + q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i; + i__11 = j + c_dim1 * 10; + q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i = + v10.r * c__[i__11].i + v10.i * c__[i__11].r; + q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i; + sum.r = q__1.r, sum.i = q__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i + + sum.i * t1.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i + + sum.i * t2.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i + + sum.i * t3.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i + + sum.i * t4.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i + + sum.i * t5.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i + + sum.i * t6.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i + + sum.i * t7.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i + + sum.i * t8.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 9; + i__3 = j + c_dim1 * 9; + q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i + + sum.i * t9.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; + i__2 = j + c_dim1 * 10; + i__3 = j + c_dim1 * 10; + q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i + + sum.i * t10.r; + q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i; + c__[i__2].r = q__1.r, c__[i__2].i = q__1.i; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of CLARFX */ + +} /* clarfx_ */ + +/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static real mul, cto1; + static logical done; + static real ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static real cfrom1; + extern doublereal slamch_(char *); + static real cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + CLASCL multiplies the M by N complex matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) REAL + CTO (input) REAL + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,M) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.f) { + *info = -4; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + cto1 = ctoc / bignum; + if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (dabs(cto1) > dabs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + i__2 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of CLASCL */ + +} /* clascl_ */ + +/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex * + alpha, complex *beta, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLASET initializes a 2-D array A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set. The lower triangle + is unchanged. + = 'L': Lower triangular part is set. The upper triangle + is unchanged. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + On entry, M specifies the number of rows of A. + + N (input) INTEGER + On entry, N specifies the number of columns of A. + + ALPHA (input) COMPLEX + All the offdiagonal array elements are set to ALPHA. + + BETA (input) COMPLEX + All the diagonal array elements are set to BETA. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; + A(i,i) = BETA , 1 <= i <= min(m,n) + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the diagonal to BETA and the strictly upper triangular + part of the array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L10: */ + } +/* L20: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L30: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the diagonal to BETA and the strictly lower triangular + part of the array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L40: */ + } +/* L50: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L60: */ + } + + } else { + +/* + Set the array to BETA on the diagonal and ALPHA on the + offdiagonal. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L70: */ + } +/* L80: */ + } + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L90: */ + } + } + + return 0; + +/* End of CLASET */ + +} /* claset_ */ + +/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, complex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, j, info; + static complex temp; + extern logical lsame_(char *, char *); + static real ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + CLASR performs the transformation + + A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) + + A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) + + where A is an m by n complex matrix and P is an orthogonal matrix, + consisting of a sequence of plane rotations determined by the + parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' + and z = n when SIDE = 'R' or 'r' ): + + When DIRECT = 'F' or 'f' ( Forward sequence ) then + + P = P( z - 1 )*...*P( 2 )*P( 1 ), + + and when DIRECT = 'B' or 'b' ( Backward sequence ) then + + P = P( 1 )*P( 2 )*...*P( z - 1 ), + + where P( k ) is a plane rotation matrix for the following planes: + + when PIVOT = 'V' or 'v' ( Variable pivot ), + the plane ( k, k + 1 ) + + when PIVOT = 'T' or 't' ( Top pivot ), + the plane ( 1, k + 1 ) + + when PIVOT = 'B' or 'b' ( Bottom pivot ), + the plane ( k, z ) + + c( k ) and s( k ) must contain the cosine and sine that define the + matrix P( k ). The two by two plane rotation part of the matrix + P( k ), R( k ), is assumed to be of the form + + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P' + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) + = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C, S (input) REAL arrays, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + c(k) and s(k) contain the cosine and sine that define the + matrix P(k). The two by two plane rotation part of the + matrix P(k), R(k), is assumed to be of the form + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + A (input/output) COMPLEX array, dimension (LDA,N) + The m by n matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P' if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("CLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + 1 + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + 1 + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = j + i__ * a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = j + i__ * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = j + i__ * a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + 1 + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + 1 + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = j + i__ * a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + i__ * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = j + i__ * a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ * a_dim1 + 1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ * a_dim1 + 1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ * a_dim1 + 1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ * a_dim1 + 1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ * a_dim1 + 1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ * a_dim1 + 1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ + i__4].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = *m + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ + i__4].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ + i__3].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = *m + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ + i__3].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + 1) * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + (j + 1) * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ + j * a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + j * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ + j * a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + (j + 1) * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + (j + 1) * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ + j * a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + j * a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ + j * a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__4 = i__ + a_dim1; + q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[ + i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__4 = i__ + a_dim1; + q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[ + i__4].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i; + i__3 = i__ + a_dim1; + q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[ + i__3].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + a_dim1; + q__2.r = stemp * temp.r, q__2.i = stemp * temp.i; + i__3 = i__ + a_dim1; + q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[ + i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + i__4 = i__ + *n * a_dim1; + q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[ + i__4].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; + i__3 = i__ + *n * a_dim1; + i__4 = i__ + *n * a_dim1; + q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[ + i__4].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__3].r = q__1.r, a[i__3].i = q__1.i; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + i__3 = i__ + *n * a_dim1; + q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[ + i__3].i; + q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = i__ + *n * a_dim1; + i__3 = i__ + *n * a_dim1; + q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[ + i__3].i; + q__3.r = stemp * temp.r, q__3.i = stemp * temp.i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - + q__3.i; + a[i__2].r = q__1.r, a[i__2].i = q__1.i; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of CLASR */ + +} /* clasr_ */ + +/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real * + scale, real *sumsq) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real r__1; + + /* Builtin functions */ + double r_imag(complex *); + + /* Local variables */ + static integer ix; + static real temp1; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLASSQ returns the values scl and ssq such that + + ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is + assumed to be at least unity and the value of ssq will then satisfy + + 1.0 .le. ssq .le. ( sumsq + 2*n ). + + scale is assumed to be non-negative and scl returns the value + + scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), + i + + scale and sumsq must be supplied in SCALE and SUMSQ respectively. + SCALE and SUMSQ are overwritten by scl and ssq respectively. + + The routine makes only one pass through the vector X. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) COMPLEX array, dimension (N) + The vector x as described above. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) REAL + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with the value scl . + + SUMSQ (input/output) REAL + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with the value ssq . + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.f) { + i__3 = ix; + temp1 = (r__1 = x[i__3].r, dabs(r__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + r__1 = *scale / temp1; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + r__1 = temp1 / *scale; + *sumsq += r__1 * r__1; + } + } + if (r_imag(&x[ix]) != 0.f) { + temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + r__1 = *scale / temp1; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + r__1 = temp1 / *scale; + *sumsq += r__1 * r__1; + } + } +/* L10: */ + } + } + + return 0; + +/* End of CLASSQ */ + +} /* classq_ */ + +/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer * + k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static complex temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (M*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of CLASWP */ + +} /* claswp_ */ + +/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a, + integer *lda, real *e, complex *tau, complex *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Local variables */ + static integer i__, iw; + static complex alpha; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *), chemv_(char *, integer *, complex *, + complex *, integer *, complex *, integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *), clarfg_(integer *, complex *, + complex *, integer *, complex *), clacgv_(integer *, complex *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLATRD reduces NB rows and columns of a complex Hermitian matrix A to + Hermitian tridiagonal form by a unitary similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', CLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', CLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by CHETRD. + + Arguments + ========= + + UPLO (input) CHARACTER + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + E (output) REAL array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) COMPLEX array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) COMPLEX array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a Hermitian rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n - i__; + clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b56, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b56, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ - 1; + clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ + - 1]); + i__2 = i__ - 1; + e[i__2] = alpha.r; + i__2 = i__ - 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[( + iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], + &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[( + i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], + &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + q__3.r = -.5f, q__3.i = -0.f; + i__2 = i__ - 1; + q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = + q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; + i__3 = i__ - 1; + cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * + a_dim1 + 1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = i__ - 1; + caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = i__ - 1; + clacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + r__1 = a[i__3].r; + a[i__2].r = r__1, a[i__2].i = 0.f; + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, + &tau[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ + + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b55, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b55, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + q__3.r = -.5f, q__3.i = -0.f; + i__2 = i__; + q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i = + q__3.r * tau[i__2].i + q__3.i * tau[i__2].r; + i__3 = *n - i__; + cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * + q__4.i + q__2.i * q__4.r; + alpha.r = q__1.r, alpha.i = q__1.i; + i__2 = *n - i__; + caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of CLATRD */ + +} /* clatrd_ */ + +/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, complex *a, integer *lda, complex *x, real *scale, + real *cnorm, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1, r__2, r__3, r__4; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + double r_imag(complex *); + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j; + static real xj, rec, tjj; + static integer jinc; + static real xbnd; + static integer imax; + static real tmax; + static complex tjjs; + static real xmax, grow; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real tscal; + static complex uscal; + static integer jlast; + extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer + *, complex *, integer *); + static complex csumj; + extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, + integer *, complex *, integer *); + static logical upper; + extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), slabad_(real *, real *); + extern integer icamax_(integer *, complex *, integer *); + extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *); + static real bignum; + extern integer isamax_(integer *, real *, integer *); + extern doublereal scasum_(integer *, complex *, integer *); + static logical notran; + static integer jfirst; + static real smlnum; + static logical nounit; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 + + + Purpose + ======= + + CLATRS solves one of the triangular systems + + A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + + with scaling to prevent overflow. Here A is an upper or lower + triangular matrix, A**T denotes the transpose of A, A**H denotes the + conjugate transpose of A, x and b are n-element vectors, and s is a + scaling factor, usually less than or equal to 1, chosen so that the + components of x will be less than the overflow threshold. If the + unscaled problem will not cause overflow, the Level 2 BLAS routine + CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + TRANS (input) CHARACTER*1 + Specifies the operation applied to A. + = 'N': Solve A * x = s*b (No transpose) + = 'T': Solve A**T * x = s*b (Transpose) + = 'C': Solve A**H * x = s*b (Conjugate transpose) + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + NORMIN (input) CHARACTER*1 + Specifies whether CNORM has been set or not. + = 'Y': CNORM contains the column norms on entry + = 'N': CNORM is not set on entry. On exit, the norms will + be computed and stored in CNORM. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The triangular matrix A. If UPLO = 'U', the leading n by n + upper triangular part of the array A contains the upper + triangular matrix, and the strictly lower triangular part of + A is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of the array A contains the lower triangular + matrix, and the strictly upper triangular part of A is not + referenced. If DIAG = 'U', the diagonal elements of A are + also not referenced and are assumed to be 1. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max (1,N). + + X (input/output) COMPLEX array, dimension (N) + On entry, the right hand side b of the triangular system. + On exit, X is overwritten by the solution vector x. + + SCALE (output) REAL + The scaling factor s for the triangular system + A * x = s*b, A**T * x = s*b, or A**H * x = s*b. + If SCALE = 0, the matrix A is singular or badly scaled, and + the vector x is an exact or approximate solution to A*x = 0. + + CNORM (input or output) REAL array, dimension (N) + + If NORMIN = 'Y', CNORM is an input argument and CNORM(j) + contains the norm of the off-diagonal part of the j-th column + of A. If TRANS = 'N', CNORM(j) must be greater than or equal + to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) + must be greater than or equal to the 1-norm. + + If NORMIN = 'N', CNORM is an output argument and CNORM(j) + returns the 1-norm of the offdiagonal part of the j-th column + of A. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + Further Details + ======= ======= + + A rough bound on x is computed; if that is less than overflow, CTRSV + is called, otherwise, specific code is used which checks for possible + overflow or divide-by-zero at every operation. + + A columnwise scheme is used for solving A*x = b. The basic algorithm + if A is lower triangular is + + x[1:n] := b[1:n] + for j = 1, ..., n + x(j) := x(j) / A(j,j) + x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] + end + + Define bounds on the components of x after j iterations of the loop: + M(j) = bound on x[1:j] + G(j) = bound on x[j+1:n] + Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. + + Then for iteration j+1 we have + M(j+1) <= G(j) / | A(j+1,j+1) | + G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | + <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) + + where CNORM(j+1) is greater than or equal to the infinity-norm of + column j+1 of A, not counting the diagonal. Hence + + G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) + 1<=i<=j + and + + |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) + 1<=i< j + + Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the + reciprocal of the largest M(j), j=1,..,n, is larger than + max(underflow, 1/overflow). + + The bound on x(j) is also used to determine when a step in the + columnwise method can be performed without fear of overflow. If + the computed bound is greater than a large constant, x is scaled to + prevent overflow, but if the bound overflows, x is set to 0, x(j) to + 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. + + Similarly, a row-wise scheme is used to solve A**T *x = b or + A**H *x = b. The basic algorithm for A upper triangular is + + for j = 1, ..., n + x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) + end + + We simultaneously compute two bounds + G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j + M(j) = bound on x(i), 1<=i<=j + + The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we + add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. + Then the bound on x(j) is + + M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | + + <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) + 1<=i<=j + + and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater + than max(underflow, 1/overflow). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + notran = lsame_(trans, "N"); + nounit = lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -3; + } else if (! lsame_(normin, "Y") && ! lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLATRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = slamch_("Safe minimum"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum /= slamch_("Precision"); + bignum = 1.f / smlnum; + *scale = 1.f; + + if (lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.f; + } + } + +/* + Scale the column norms by TSCAL if the maximum element in CNORM is + greater than BIGNUM/2. +*/ + + imax = isamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum * .5f) { + tscal = 1.f; + } else { + tscal = .5f / (smlnum * tmax); + sscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* + Compute a bound on the computed solution vector to see if the + Level 2 BLAS routine CTRSV can be used. +*/ + + xmax = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j; + r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 = + r_imag(&x[j]) / 2.f, dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L30: */ + } + xbnd = xmax; + + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.f) { + grow = 0.f; + goto L60; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, G(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5f / dmax(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + + if (tjj >= smlnum) { + +/* + M(j) = G(j-1) / abs(A(j,j)) + + Computing MIN +*/ + r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow; + xbnd = dmin(r__1,r__2); + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.f; + } + + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.f; + } +/* L40: */ + } + grow = xbnd; + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); + grow = dmin(r__1,r__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1.f / (cnorm[j] + 1.f); +/* L50: */ + } + } +L60: + + ; + } else { + +/* Compute the growth in A**T * x = b or A**H * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.f) { + grow = 0.f; + goto L90; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, M(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5f / dmax(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.f; +/* Computing MIN */ + r__1 = grow, r__2 = xbnd / xj; + grow = dmin(r__1,r__2); + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + + if (tjj >= smlnum) { + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + if (xj > tjj) { + xbnd *= tjj / xj; + } + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.f; + } +/* L70: */ + } + grow = dmin(grow,xbnd); + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum); + grow = dmin(r__1,r__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.f; + grow /= xj; +/* L80: */ + } + } +L90: + ; + } + + if (grow * tscal > smlnum) { + +/* + Use the Level 2 BLAS solve if the reciprocal of the bound on + elements of X is not too small. +*/ + + ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum * .5f) { + +/* + Scale X so that its components are less than or equal to + BIGNUM in absolute value. +*/ + + *scale = bignum * .5f / xmax; + csscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } else { + xmax *= 2.f; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L105; + } + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* + Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM + to avoid overflow when dividing by A(j,j). +*/ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.f) { + +/* + Scale by 1/CNORM(j) to avoid overflow when + multiplying x(j) times column j. +*/ + + rec /= cnorm[j]; + } + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0, and compute a solution to A*x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L100: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + xj = 1.f; + *scale = 0.f; + xmax = 0.f; + } +L105: + +/* + Scale x if necessary to avoid overflow when adding a + multiple of column j of A. +*/ + + if (xj > 1.f) { + rec = 1.f / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5f; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + csscal_(n, &c_b1794, &x[1], &c__1); + *scale *= .5f; + } + + if (upper) { + if (j > 1) { + +/* + Compute the update + x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +*/ + + i__3 = j - 1; + i__4 = j; + q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = icamax_(&i__3, &x[1], &c__1); + i__3 = i__; + xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[i__]), dabs(r__2)); + } + } else { + if (j < *n) { + +/* + Compute the update + x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +*/ + + i__3 = *n - j; + i__4 = j; + q__2.r = -x[i__4].r, q__2.i = -x[i__4].i; + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + icamax_(&i__3, &x[j + 1], &c__1); + i__3 = i__; + xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[i__]), dabs(r__2)); + } + } +/* L110: */ + } + + } else if (lsame_(trans, "T")) { + +/* Solve A**T * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + uscal.r = tscal, uscal.i = 0.f; + rec = 1.f / dmax(xmax,1.f); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5f; + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] + .i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > 1.f) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + r__1 = 1.f, r__2 = rec * tjj; + rec = dmin(r__1,r__2); + cladiv_(&q__1, &uscal, &tjjs); + uscal.r = q__1.r, uscal.i = q__1.i; + } + if (rec < 1.f) { + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0.f, csumj.i = 0.f; + if (uscal.r == 1.f && uscal.i == 0.f) { + +/* + If the scaling needed for A in the dot product is 1, + call CDOTU to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } else if (j < *n) { + i__3 = *n - j; + cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + q__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, q__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, + q__2.i = q__3.r * x[i__5].i + q__3.i * x[ + i__5].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L120: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + q__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, q__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, + q__2.i = q__3.r * x[i__5].i + q__3.i * x[ + i__5].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L130: */ + } + } + } + + q__1.r = tscal, q__1.i = 0.f; + if (uscal.r == q__1.r && uscal.i == q__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - + csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + if (nounit) { + i__3 = j + j * a_dim1; + q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3] + .i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L145; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**T *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L140: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + *scale = 0.f; + xmax = 0.f; + } +L145: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + cladiv_(&q__2, &x[j], &tjjs); + q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } +/* Computing MAX */ + i__3 = j; + r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[j]), dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L150: */ + } + + } else { + +/* Solve A**H * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), + dabs(r__2)); + uscal.r = tscal, uscal.i = 0.f; + rec = 1.f / dmax(xmax,1.f); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5f; + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + } + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > 1.f) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + r__1 = 1.f, r__2 = rec * tjj; + rec = dmin(r__1,r__2); + cladiv_(&q__1, &uscal, &tjjs); + uscal.r = q__1.r, uscal.i = q__1.i; + } + if (rec < 1.f) { + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0.f, csumj.i = 0.f; + if (uscal.r == 1.f && uscal.i == 0.f) { + +/* + If the scaling needed for A in the dot product is 1, + call CDOTC to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } else if (j < *n) { + i__3 = *n - j; + cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = q__1.r, csumj.i = q__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + r_cnjg(&q__4, &a[i__ + j * a_dim1]); + q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, + q__3.i = q__4.r * uscal.i + q__4.i * + uscal.r; + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L160: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + r_cnjg(&q__4, &a[i__ + j * a_dim1]); + q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, + q__3.i = q__4.r * uscal.i + q__4.i * + uscal.r; + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + + q__2.i; + csumj.r = q__1.r, csumj.i = q__1.i; +/* L170: */ + } + } + } + + q__1.r = tscal, q__1.i = 0.f; + if (uscal.r == q__1.r && uscal.i == q__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - + csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + i__3 = j; + xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j] + ), dabs(r__2)); + if (nounit) { + r_cnjg(&q__2, &a[j + j * a_dim1]); + q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i; + tjjs.r = q__1.r, tjjs.i = q__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.f; + if (tscal == 1.f) { + goto L185; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), + dabs(r__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.f) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1.f / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else if (tjj > 0.f) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + csscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + cladiv_(&q__1, &x[j], &tjjs); + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**H *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0.f, x[i__4].i = 0.f; +/* L180: */ + } + i__3 = j; + x[i__3].r = 1.f, x[i__3].i = 0.f; + *scale = 0.f; + xmax = 0.f; + } +L185: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + cladiv_(&q__2, &x[j], &tjjs); + q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } +/* Computing MAX */ + i__3 = j; + r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = + r_imag(&x[j]), dabs(r__2)); + xmax = dmax(r__3,r__4); +/* L190: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.f) { + r__1 = 1.f / tscal; + sscal_(n, &r__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of CLATRS */ + +} /* clatrs_ */ + +/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1; + + /* Local variables */ + static integer i__; + static real aii; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static logical upper; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + csscal_(integer *, real *, complex *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ + i__ + (i__ + 1) * a_dim1], lda); + r__1 = aii * aii + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + q__1.r = aii, q__1.i = 0.f; + cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + q__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + r__1 = aii * aii + q__1.r; + a[i__2].r = r__1, a[i__2].i = 0.f; + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + q__1.r = aii, q__1.i = 0.f; + cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + q__1, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + clacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + csscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of CLAUU2 */ + +} /* clauu2_ */ + +/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cherk_(char *, + char *, integer *, integer *, real *, complex *, integer *, real * + , complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + clauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & + i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[ + i__ * a_dim1 + 1], lda); + clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + cgemm_("No transpose", "Conjugate transpose", &i__3, &ib, + &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, & + a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__ + * a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + cherk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & + ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[ + i__ + a_dim1], lda); + clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + cgemm_("Conjugate transpose", "No transpose", &ib, &i__3, + &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, & + a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ + + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + cherk_("Lower", "Conjugate transpose", &ib, &i__3, & + c_b871, &a[i__ + ib + i__ * a_dim1], lda, &c_b871, + &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of CLAUUM */ + +} /* clauum_ */ + +/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + complex q__1, q__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static real ajj; + extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer + *, complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static logical upper; + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + csscal_(integer *, real *, complex *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CPOTF2 computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + i__3 = j - 1; + cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] + , &c__1); + q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; + ajj = q__1.r; + if (ajj <= 0.f) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + i__3 = *n - j; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + ( + j + 1) * a_dim1], lda); + i__2 = j - 1; + clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + r__1 = a[i__2].r; + i__3 = j - 1; + cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); + q__1.r = r__1 - q__2.r, q__1.i = -q__2.i; + ajj = q__1.r; + if (ajj <= 0.f) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.f; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = j - 1; + clacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + i__3 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1] + , lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j * + a_dim1], &c__1); + i__2 = j - 1; + clacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of CPOTF2 */ + +} /* cpotf2_ */ + +/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + complex q__1; + + /* Local variables */ + static integer j, jb, nb; + extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, + integer *, complex *, complex *, integer *, complex *, integer *, + complex *, complex *, integer *), cherk_(char *, + char *, integer *, integer *, real *, complex *, integer *, real * + , complex *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CPOTRF computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U**H * U, if UPLO = 'U', or + A = L * L**H, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + cpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1150, & + a[j * a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1], + lda); + cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("Conjugate transpose", "No transpose", &jb, &i__3, + &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) + * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", + &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[ + j + (j + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + cherk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j + + a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda); + cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + cgemm_("No transpose", "Conjugate transpose", &i__3, &jb, + &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j + + a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1], + lda); + i__3 = *n - j - jb + 1; + ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" + , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[ + j + jb + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of CPOTRF */ + +} /* cpotrf_ */ + +/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), clauum_( + char *, integer *, complex *, integer *, integer *), + ctrtri_(char *, char *, integer *, complex *, integer *, integer * + ); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + CPOTRI computes the inverse of a complex Hermitian positive definite + matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + computed by CPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H, as computed by + CPOTRF. + On exit, the upper or lower triangle of the (Hermitian) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + clauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of CPOTRI */ + +} /* cpotri_ */ + +/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex * + a, integer *lda, complex *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CPOTRS solves a system of linear equations A*X = B with a Hermitian + positive definite matrix A using the Cholesky factorization + A = U**H*U or A = L*L**H computed by CPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**H*U or A = L*L**H, as computed by CPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) COMPLEX array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b56, &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b56, &a[a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of CPOTRS */ + +} /* cpotrs_ */ + +/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex * + cy, integer *incy, real *c__, real *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + complex q__1, q__2, q__3; + + /* Local variables */ + static integer i__, ix, iy; + static complex ctemp; + + +/* + applies a plane rotation, where the cos and sin (c and s) are real + and the vectors cx and cy are complex. + jack dongarra, linpack, 3/11/78. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = iy; + q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + i__2 = iy; + i__3 = iy; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + i__4 = ix; + q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = ix; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i; + i__3 = i__; + q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i; + q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; + ctemp.r = q__1.r, ctemp.i = q__1.i; + i__2 = i__; + i__3 = i__; + q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i; + i__4 = i__; + q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i; + q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; + cy[i__2].r = q__1.r, cy[i__2].i = q__1.i; + i__2 = i__; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* csrot_ */ + +/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, complex *work, integer *lwork, real * + rwork, integer *lrwork, integer *iwork, integer *liwork, integer * + info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k, m; + static real p; + static integer ii, ll, end, lgn; + static real eps, tiny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static integer lwmin; + extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *, + complex *, integer *, complex *, integer *, real *, integer *, + integer *); + static integer start; + extern /* Subroutine */ int clacrm_(integer *, integer *, complex *, + integer *, real *, integer *, complex *, integer *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex + *, integer *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *, + integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + static integer liwmin, icompz; + extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, + complex *, integer *, real *, integer *); + static real orgnrm; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + static integer lrwmin; + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See SLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original Hermitian matrix + also. On entry, Z contains the unitary matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. + If COMPZ = 'V' and N > 1, LWORK must be at least N*N. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace/output) REAL array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. + If COMPZ = 'V' and N > 1, LRWORK must be at least + 1 + 3*N + 2*N*lg N + 3*N**2 , + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1, LRWORK must be at least + 1 + 4*N + 2*N**2 . + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the RWORK array, + returns this value as the first entry of the RWORK array, and + no error message related to LRWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. + If COMPZ = 'V' or N > 1, LIWORK must be at least + 6 + 6*N + 5*N*lg N. + If COMPZ = 'I' or N > 1, LIWORK must be at least + 3 + 5*N . + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (*n <= 1 || icompz <= 0) { + lwmin = 1; + liwmin = 1; + lrwmin = 1; + } else { + lgn = (integer) (log((real) (*n)) / log(2.f)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { + lwmin = *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { + lwmin = 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + + smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures SSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. + + If COMPZ = 'N', use SSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + return 0; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + return 0; + } else if (icompz == 2) { + csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + return 0; + } else { + csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + return 0; + } + } + +/* If COMPZ = 'I', we simply call SSTEDC instead. */ + + if (icompz == 2) { + slaset_("Full", n, n, &c_b1101, &c_b871, &rwork[1], n); + ll = *n * *n + 1; + i__1 = *lrwork - ll + 1; + sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & + iwork[1], liwork, info); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * z_dim1; + i__4 = (j - 1) * *n + i__; + z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* + From now on, only option left to be handled is COMPZ = 'V', + i.e. ICOMPZ = 1. + + Scale. +*/ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + return 0; + } + + eps = slamch_("Epsilon"); + + start = 1; + +/* while ( START <= N ) */ + +L30: + if (start <= *n) { + +/* + Let END be the position of the next subdiagonal entry such that + E( END ) <= TINY or END = N if no such subdiagonal exists. The + matrix identified by the elements between START and END + constitutes an independent sub-problem. +*/ + + end = start; +L40: + if (end < *n) { + tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = + d__[end + 1], dabs(r__2))); + if ((r__1 = e[end], dabs(r__1)) > tiny) { + ++end; + goto L40; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = end - start + 1; + if (m > smlsiz) { + *info = smlsiz; + +/* Scale. */ + + orgnrm = slanst_("M", &m, &d__[start], &e[start]); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[ + start], &i__2, info); + + claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1], + ldz, &work[1], n, &rwork[1], &iwork[1], info); + if (*info > 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + + 1) + start - 1; + return 0; + } + +/* Scale back. */ + + slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m * + m + 1], info); + clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & + work[1], n, &rwork[m * m + 1]); + clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz); + if (*info > 0) { + *info = start * (*n + 1) + end; + return 0; + } + } + + start = end + 1; + goto L30; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L50: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L60: */ + } + } + + work[1].r = (real) lwmin, work[1].i = 0.f; + rwork[1] = (real) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of CSTEDC */ + +} /* cstedc_ */ + +/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e, + complex *z__, integer *ldz, real *work, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static real b, c__, f, g; + static integer i__, j, k, l, m; + static real p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static real rt1, rt2, eps; + static integer lsv; + static real tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int clasr_(char *, char *, char *, integer *, + integer *, real *, real *, complex *, integer *); + static real anorm; + extern /* Subroutine */ int cswap_(integer *, complex *, integer *, + complex *, integer *); + static integer lendm1, lendp1; + extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * + , real *, real *); + extern doublereal slapy2_(real *, real *); + static integer iscale; + extern doublereal slamch_(char *); + extern /* Subroutine */ int claset_(char *, integer *, integer *, complex + *, complex *, complex *, integer *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ); + static real ssfmin; + static integer nmaxit, icompz; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this + matrix to tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + Hermitian matrix. On entry, Z must contain the + unitary matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) REAL array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is unitarily similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1.f, z__[i__1].i = 0.f; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (r__1 = e[m], dabs(r__1)); + if (tst == 0.f) { + goto L30; + } + if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m + + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.f) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + + 1], dabs(r__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.f); + r__ = slapy2_(&g, &c_b871); + g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m - 1], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + - 1], dabs(r__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.f); + r__ = slapy2_(&g, &c_b871); + g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot == nmaxit) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L150: */ + } + return 0; + } + goto L10; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + return 0; + +/* End of CSTEQR */ + +} /* csteqr_ */ + +/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select, + integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, + complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, + real *rwork, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4, i__5; + real r__1, r__2, r__3; + complex q__1, q__2; + + /* Builtin functions */ + double r_imag(complex *); + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, k, ii, ki, is; + static real ulp; + static logical allv; + static real unfl, ovfl, smin; + static logical over; + static real scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * + , complex *, integer *, complex *, integer *, complex *, complex * + , integer *); + static real remax; + extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, + complex *, integer *); + static logical leftv, bothv, somev; + extern /* Subroutine */ int slabad_(real *, real *); + extern integer icamax_(integer *, complex *, integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *), xerbla_(char *, integer *), clatrs_(char *, char *, + char *, char *, integer *, complex *, integer *, complex *, real * + , real *, integer *); + extern doublereal scasum_(integer *, complex *, integer *); + static logical rightv; + static real smlnum; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CTREVC computes some or all of the right and/or left eigenvectors of + a complex upper triangular matrix T. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, y'*T = w*y' + + where y' denotes the conjugate transpose of the vector y. + + If all eigenvectors are requested, the routine may either return the + matrices X and/or Y of right or left eigenvectors of T, or the + products Q*X and/or Q*Y, where Q is an input unitary + matrix. If T was obtained from the Schur factorization of an + original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of + right or left eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + and backtransform them using the input matrices + supplied in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + specified by the logical array SELECT. + + SELECT (input) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If HOWMNY = 'A' or 'B', SELECT is not referenced. + To select the eigenvector corresponding to the j-th + eigenvalue, SELECT(j) must be set to .TRUE.. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX array, dimension (LDT,N) + The upper triangular matrix T. T is modified, but restored + on exit. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) COMPLEX array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by CHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + VL is lower triangular. The i-th column + VL(i) of VL is the eigenvector corresponding + to T(i,i). + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + If SIDE = 'R', VL is not referenced. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= max(1,N) if + SIDE = 'L' or 'B'; LDVL >= 1 otherwise. + + VR (input/output) COMPLEX array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by CHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + VR is upper triangular. The i-th column + VR(i) of VR is the eigenvector corresponding + to T(i,i). + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + If SIDE = 'L', VR is not referenced. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= max(1,N) if + SIDE = 'R' or 'B'; LDVR >= 1 otherwise. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. If HOWMNY = 'A' or 'B', M + is set to N. Each selected eigenvector occupies one + column. + + WORK (workspace) COMPLEX array, dimension (2*N) + + RWORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + +/* + Set M to the number of columns required to store the selected + eigenvectors. +*/ + + if (somev) { + *m = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (select[j]) { + ++(*m); + } +/* L10: */ + } + } else { + *m = *n; + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else if (*mm < *m) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (*n / ulp); + +/* Store the diagonal elements of T in working array WORK. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + *n; + i__3 = i__ + i__ * t_dim1; + work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; +/* L20: */ + } + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + rwork[1] = 0.f; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1); +/* L30: */ + } + + if (rightv) { + +/* Compute right eigenvectors. */ + + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (somev) { + if (! select[ki]) { + goto L80; + } + } +/* Computing MAX */ + i__1 = ki + ki * t_dim1; + r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[ + ki + ki * t_dim1]), dabs(r__2))); + smin = dmax(r__3,smlnum); + + work[1].r = 1.f, work[1].i = 0.f; + +/* Form right-hand side. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + i__3 = k + ki * t_dim1; + q__1.r = -t[i__3].r, q__1.i = -t[i__3].i; + work[i__2].r = q__1.r, work[i__2].i = q__1.i; +/* L40: */ + } + +/* + Solve the triangular system: + (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +*/ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + k * t_dim1; + i__4 = ki + ki * t_dim1; + q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4] + .i; + t[i__2].r = q__1.r, t[i__2].i = q__1.i; + i__2 = k + k * t_dim1; + if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * + t_dim1]), dabs(r__2)) < smin) { + i__3 = k + k * t_dim1; + t[i__3].r = smin, t[i__3].i = 0.f; + } +/* L50: */ + } + + if (ki > 1) { + i__1 = ki - 1; + clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ + t_offset], ldt, &work[1], &scale, &rwork[1], info); + i__1 = ki; + work[i__1].r = scale, work[i__1].i = 0.f; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + + ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + i__1 = ii + is * vr_dim1; + remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = + r_imag(&vr[ii + is * vr_dim1]), dabs(r__2))); + csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + i__2 = k + is * vr_dim1; + vr[i__2].r = 0.f, vr[i__2].i = 0.f; +/* L60: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + q__1.r = scale, q__1.i = 0.f; + cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[ + 1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1); + } + + ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + i__1 = ii + ki * vr_dim1; + remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = + r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2))); + csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + *n; + t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; +/* L70: */ + } + + --is; +L80: + ; + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (somev) { + if (! select[ki]) { + goto L130; + } + } +/* Computing MAX */ + i__2 = ki + ki * t_dim1; + r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[ + ki + ki * t_dim1]), dabs(r__2))); + smin = dmax(r__3,smlnum); + + i__2 = *n; + work[i__2].r = 1.f, work[i__2].i = 0.f; + +/* Form right-hand side. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k; + r_cnjg(&q__2, &t[ki + k * t_dim1]); + q__1.r = -q__2.r, q__1.i = -q__2.i; + work[i__3].r = q__1.r, work[i__3].i = q__1.i; +/* L90: */ + } + +/* + Solve the triangular system: + (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +*/ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + k * t_dim1; + i__5 = ki + ki * t_dim1; + q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5] + .i; + t[i__3].r = q__1.r, t[i__3].i = q__1.i; + i__3 = k + k * t_dim1; + if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k * + t_dim1]), dabs(r__2)) < smin) { + i__4 = k + k * t_dim1; + t[i__4].r = smin, t[i__4].i = 0.f; + } +/* L100: */ + } + + if (ki < *n) { + i__2 = *n - ki; + clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & + i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + + 1], &scale, &rwork[1], info); + i__2 = ki; + work[i__2].r = scale, work[i__2].i = 0.f; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) + ; + + i__2 = *n - ki + 1; + ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + i__2 = ii + is * vl_dim1; + remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&vl[ii + is * vl_dim1]), dabs(r__2))); + i__2 = *n - ki + 1; + csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + is * vl_dim1; + vl[i__3].r = 0.f, vl[i__3].i = 0.f; +/* L110: */ + } + } else { + if (ki < *n) { + i__2 = *n - ki; + q__1.r = scale, q__1.i = 0.f; + cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1], + ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki * + vl_dim1 + 1], &c__1); + } + + ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + i__2 = ii + ki * vl_dim1; + remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = + r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2))); + csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + *n; + t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; +/* L120: */ + } + + ++is; +L130: + ; + } + } + + return 0; + +/* End of CTREVC */ + +} /* ctrevc_ */ + +/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + complex q__1; + + /* Builtin functions */ + void c_div(complex *, complex *, complex *); + + /* Local variables */ + static integer j; + static complex ajj; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *, + complex *, integer *, complex *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CTRTI2 computes the inverse of a complex upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + c_div(&q__1, &c_b56, &a[j + j * a_dim1]); + a[i__2].r = q__1.r, a[i__2].i = q__1.i; + i__2 = j + j * a_dim1; + q__1.r = -a[i__2].r, q__1.i = -a[i__2].i; + ajj.r = q__1.r, ajj.i = q__1.i; + } else { + q__1.r = -1.f, q__1.i = -0.f; + ajj.r = q__1.r, ajj.i = q__1.i; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + c_div(&q__1, &c_b56, &a[j + j * a_dim1]); + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + i__1 = j + j * a_dim1; + q__1.r = -a[i__1].r, q__1.i = -a[i__1].i; + ajj.r = q__1.r, ajj.i = q__1.i; + } else { + q__1.r = -1.f, q__1.i = -0.f; + ajj.r = q__1.r, ajj.i = q__1.i; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of CTRTI2 */ + +} /* ctrti2_ */ + +/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + complex q__1; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, + integer *, integer *, complex *, complex *, integer *, complex *, + integer *), ctrsm_(char *, char *, + char *, char *, integer *, integer *, complex *, complex *, + integer *, complex *, integer *); + static logical upper; + extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, + integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CTRTRI computes the inverse of a complex upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0.f && a[i__2].i == 0.f) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + ctrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + q__1.r = -1.f, q__1.i = -0.f; + ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + q__1.r = -1.f, q__1.i = -0.f; + ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of CTRTRI */ + +} /* ctrtri_ */ + +/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), clarf_(char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CUNG2R generates an m by n complex matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by CGEQRF in the first k columns of its array + argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + WORK (workspace) COMPLEX array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + i__2 = i__; + q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; + cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + i__1 = i__ + i__ * a_dim1; + i__2 = i__; + q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = l + i__ * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of CUNG2R */ + +} /* cung2r_ */ + +/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k, + complex *a, integer *lda, complex *tau, complex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunglq_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *), + cungqr_(integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNGBR generates one of the complex unitary matrices Q or P**H + determined by CGEBRD when reducing a complex matrix A to bidiagonal + form: A = Q * B * P**H. Q and P**H are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + is of order N: + if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m + rows of P**H, where n >= m >= k; + if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**H is + required, as defined in the transformation applied by CGEBRD: + = 'Q': generate Q; + = 'P': generate P**H. + + M (input) INTEGER + The number of rows of the matrix Q or P**H to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**H to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by CGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by CGEBRD. + K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by CGEBRD. + On exit, the M-by-N matrix Q or P**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= M. + + TAU (input) COMPLEX array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**H, as + returned by CGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to CGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + i__1 = j * a_dim1 + 1; + a[i__1].r = 0.f, a[i__1].i = 0.f; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + (j - 1) * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L10: */ + } +/* L20: */ + } + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + cungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to CGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + i__1 = a_dim1 + 1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ - 1 + j * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L50: */ + } + i__2 = j * a_dim1 + 1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + cunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNGBR */ + +} /* cungbr_ */ + +/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex * + a, integer *lda, complex *tau, complex *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNGHR generates a complex unitary matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + CGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of CGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by CGEHRD. + On exit, the N-by-N unitary matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEHRD. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + (j - 1) * a_dim1; + a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L50: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L70: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNGHR */ + +} /* cunghr_ */ + +/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + complex q__1, q__2; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *), clarf_(char *, integer *, integer *, complex *, + integer *, complex *, complex *, integer *, complex *), + clacgv_(integer *, complex *, integer *), xerbla_(char *, integer + *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by CGELQF in the first k rows of its array argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + WORK (workspace) COMPLEX array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } + if (j > *k && j <= *m) { + i__2 = j + j * a_dim1; + a[i__2].r = 1.f, a[i__2].i = 0.f; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i)' to A(i:m,i:n) from the right */ + + if (i__ < *n) { + i__1 = *n - i__; + clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + if (i__ < *m) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1.f, a[i__1].i = 0.f; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + r_cnjg(&q__1, &tau[i__]); + clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + i__2 = i__; + q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i; + cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__1 = *n - i__; + clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__1 = i__ + i__ * a_dim1; + r_cnjg(&q__2, &tau[i__]); + q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i; + a[i__1].r = q__1.r, a[i__1].i = q__1.i; + +/* Set A(i,1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = i__ + l * a_dim1; + a[i__2].r = 0.f, a[i__2].i = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of CUNGL2 */ + +} /* cungl2_ */ + +/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cungl2_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *), clarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *), clarft_( + char *, char *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by CGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit; + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", + &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ + ib + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0.f, a[i__4].i = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CUNGLQ */ + +} /* cunglq_ */ + +/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a, + integer *lda, complex *tau, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int cung2r_(integer *, integer *, integer *, + complex *, integer *, complex *, complex *, integer *), clarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + complex *, integer *, complex *, integer *, complex *, integer *, + complex *, integer *), clarft_( + char *, char *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by CGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0.f, a[i__3].i = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + clarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0.f, a[i__4].i = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (real) iws, work[1].i = 0.f; + return 0; + +/* End of CUNGQR */ + +} /* cungqr_ */ + +/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CUNM2L overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQLF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } + i__3 = nq - *k + i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ + c_offset], ldc, &work[1]); + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of CUNM2L */ + +} /* cunm2l_ */ + +/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CUNM2R overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of CUNM2R */ + +} /* cunm2r_ */ + +/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, complex *a, integer *lda, complex *tau, + complex *c__, integer *ldc, complex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static logical notran; + extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *); + static logical applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'C': P**H * C C * P**H + + Here Q and P**H are the unitary matrices determined by CGEBRD when + reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + and P**H are defined as products of elementary reflectors H(i) and + G(i) respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the unitary matrix Q or P**H that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**H; + = 'P': apply P or P**H. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**H, P or P**H from the Left; + = 'R': apply Q, Q**H, P or P**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'C': Conjugate transpose, apply Q**H or P**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by CGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by CGEBRD. + K >= 0. + + A (input) COMPLEX array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by CGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) COMPLEX array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by CGEBRD in the array argument TAUQ or TAUP. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q + or P*C or P**H*C or C*P or C*P**H. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMBR", &i__1); + return 0; + } else if (lquery) { + } + +/* Quick return if possible */ + + work[1].r = 1.f, work[1].i = 0.f; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to CGEBRD with nq >= k */ + + cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to CGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to CGEBRD with nq > k */ + + cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to CGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + cunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMBR */ + +} /* cunmbr_ */ + +/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + complex q__1; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static complex aii; + static logical left; + static complex taui; + extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * + , integer *, complex *, complex *, integer *, complex *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), + xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + CUNML2 overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + r_cnjg(&q__1, &tau[i__]); + taui.r = q__1.r, taui.i = q__1.i; + } else { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } + if (i__ < nq) { + i__3 = nq - i__; + clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1.f, a[i__3].i = 0.f; + clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; + if (i__ < nq) { + i__3 = nq - i__; + clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of CUNML2 */ + +} /* cunml2_ */ + +/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNMLQ overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGELQF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMLQ */ + +} /* cunmlq_ */ + +/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNMQL overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQLF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMQL */ + +} /* cunmql_ */ + +/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static complex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *), clarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, complex *, + integer *, complex *, integer *, complex *, integer *, complex *, + integer *), clarft_(char *, char * + , integer *, integer *, complex *, integer *, complex *, complex * + , integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNMQR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + CGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CGEQRF. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("CUNMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMQR */ + +} /* cunmqr_ */ + +/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, complex *a, integer *lda, complex *tau, complex *c__, + integer *ldc, complex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *, + integer *, complex *, integer *, complex *, complex *, integer *, + complex *, integer *, integer *), cunmqr_(char *, + char *, integer *, integer *, integer *, complex *, integer *, + complex *, complex *, integer *, complex *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + CUNMTR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by CHETRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from CHETRD; + = 'L': Lower triangle of A contains elementary reflectors + from CHETRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) COMPLEX array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by CHETRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by CHETRD. + + C (input/output) COMPLEX array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >=M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (real) lwkopt, work[1].i = 0.f; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("CUNMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1].r = 1.f, work[1].i = 0.f; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to CHETRD with UPLO = 'U' */ + + i__2 = nq - 1; + cunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to CHETRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1].r = (real) lwkopt, work[1].i = 0.f; + return 0; + +/* End of CUNMTR */ + +} /* cunmtr_ */ + diff --git a/numpy/linalg/lapack_lite/f2c_d_lapack.c b/numpy/linalg/lapack_lite/f2c_d_lapack.c new file mode 100644 index 000000000..cb28b686f --- /dev/null +++ b/numpy/linalg/lapack_lite/f2c_d_lapack.c @@ -0,0 +1,36180 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; +static doublereal c_b94 = -.125; +static doublereal c_b151 = -1.; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__8 = 8; +static integer c__4 = 4; +static integer c__65 = 65; +static integer c__6 = 6; +static integer c__15 = 15; +static logical c_false = FALSE_; +static integer c__10 = 10; +static integer c__11 = 11; +static doublereal c_b2804 = 2.; +static logical c_true = TRUE_; + +/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * + d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *), log(doublereal); + + /* Local variables */ + static integer i__, j, k; + static doublereal p, r__; + static integer z__, ic, ii, kk; + static doublereal cs; + static integer is, iu; + static doublereal sn; + static integer nm1; + static doublereal eps; + static integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * + , doublereal *, integer *), dswap_(integer *, doublereal *, + integer *, doublereal *, integer *); + static integer poles, iuplo, nsize, start; + extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *); + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + static integer icompq; + static doublereal orgnrm; + static integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + December 1, 1999 + + + Purpose + ======= + + DBDSDC computes the singular value decomposition (SVD) of a real + N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, + using a divide and conquer method, where S is a diagonal matrix + with non-negative diagonal elements (the singular values of B), and + U and VT are orthogonal matrices of left and right singular vectors, + respectively. DBDSDC can be used to compute all singular values, + and optionally, singular vectors or singular vectors in compact form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLASD3 for details. + + The code currently call DLASDQ if singular values only are desired. + However, it can be slightly modified to compute singular values + using the divide and conquer method. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal. + = 'L': B is lower bidiagonal. + + COMPQ (input) CHARACTER*1 + Specifies whether singular vectors are to be computed + as follows: + = 'N': Compute singular values only; + = 'P': Compute singular values and compute singular + vectors in compact form; + = 'I': Compute singular values and singular vectors. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B. + + E (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the elements of E contain the offdiagonal + elements of the bidiagonal matrix whose SVD is desired. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, dimension (LDU,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, U contains the left singular vectors + of the bidiagonal matrix. + For other values of COMPQ, U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1. + If singular vectors are desired, then LDU >= max( 1, N ). + + VT (output) DOUBLE PRECISION array, dimension (LDVT,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, VT' contains the right singular + vectors of the bidiagonal matrix. + For other values of COMPQ, VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1. + If singular vectors are desired, then LDVT >= max( 1, N ). + + Q (output) DOUBLE PRECISION array, dimension (LDQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, Q contains all the DOUBLE PRECISION data in + LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, Q is not referenced. + + IQ (output) INTEGER array, dimension (LDIQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, IQ contains all INTEGER data in + LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, IQ is not referenced. + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) + If COMPQ = 'N' then LWORK >= (4 * N). + If COMPQ = 'P' then LWORK >= (6 * N). + If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). + + IWORK (workspace) INTEGER array, dimension (8*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an singular value. + The update process of divide and conquer failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (lsame_(compq, "N")) { + icompq = 0; + } else if (lsame_(compq, "P")) { + icompq = 1; + } else if (lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSDC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + wstart = (*n << 1) - 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ + + if (icompq == 0) { + dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + goto L40; + } + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = EPSILON; + + mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N). Solve this 1-by-1 problem + first. +*/ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + if (*info != 0) { + return 0; + } + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* + If B is lower bidiagonal, update U by those Givens rotations + which rotated B to be upper bidiagonal +*/ + + if (iuplo == 2 && icompq == 2) { + dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of DBDSDC */ + +} /* dbdsdc_ */ + +/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( + doublereal *, doublereal *); + + /* Local variables */ + static doublereal f, g, h__; + static integer i__, j, m; + static doublereal r__, cs; + static integer ll; + static doublereal sn, mu; + static integer nm1, nm12, nm13, lll; + static doublereal eps, sll, tol, abse; + static integer idir; + static doublereal abss; + static integer oldm; + static doublereal cosl; + static integer isub, iter; + static doublereal unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dlas2_( + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static doublereal oldcs; + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + static integer oldll; + static doublereal shift, sigmn, oldsn; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer maxit; + static doublereal sminl, sigmx; + static logical lower; + extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *); + static doublereal sminoa, thresh; + static logical rotate; + static doublereal sminlo, tolmul; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DBDSQR computes the singular value decomposition (SVD) of a real + N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' + denotes the transpose of P), where S is a diagonal matrix with + non-negative diagonal elements (the singular values of B), and Q + and P are orthogonal matrices. + + The routine computes S, and optionally computes U * Q, P' * VT, + or Q' * C, for given real input matrices U, VT, and C. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + no. 5, pp. 873-912, Sept 1990) and + "Accurate singular values and differential qd algorithms," by + B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + Department, University of California at Berkeley, July 1992 + for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal; + = 'L': B is lower bidiagonal. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + NCVT (input) INTEGER + The number of columns of the matrix VT. NCVT >= 0. + + NRU (input) INTEGER + The number of rows of the matrix U. NRU >= 0. + + NCC (input) INTEGER + The number of columns of the matrix C. NCC >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B in decreasing + order. + + E (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the elements of E contain the + offdiagonal elements of the bidiagonal matrix whose SVD + is desired. On normal exit (INFO = 0), E is destroyed. + If the algorithm does not converge (INFO > 0), D and E + will contain the diagonal and superdiagonal elements of a + bidiagonal matrix orthogonally equivalent to the one given + as input. E(N) is used for workspace. + + VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) + On entry, an N-by-NCVT matrix VT. + On exit, VT is overwritten by P' * VT. + VT is not referenced if NCVT = 0. + + LDVT (input) INTEGER + The leading dimension of the array VT. + LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. + + U (input/output) DOUBLE PRECISION array, dimension (LDU, N) + On entry, an NRU-by-N matrix U. + On exit, U is overwritten by U * Q. + U is not referenced if NRU = 0. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max(1,NRU). + + C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) + On entry, an N-by-NCC matrix C. + On exit, C is overwritten by Q' * C. + C is not referenced if NCC = 0. + + LDC (input) INTEGER + The leading dimension of the array C. + LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: If INFO = -i, the i-th argument had an illegal value + > 0: the algorithm did not converge; D and E contain the + elements of a bidiagonal matrix which is orthogonally + similar to the input matrix B; if INFO = i, i + elements of E have not converged to zero. + + Internal Parameters + =================== + + TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) + TOLMUL controls the convergence criterion of the QR loop. + If it is positive, TOLMUL*EPS is the desired relative + precision in the computed singular values. + If it is negative, abs(TOLMUL*EPS*sigma_max) is the + desired absolute accuracy in the computed singular + values (corresponds to relative accuracy + abs(TOLMUL*EPS) in the largest singular value. + abs(TOLMUL) should be between 1 and 1/EPS, and preferably + between 10 (for fast convergence) and .1/EPS + (for there to be some accuracy in the results). + Default is to lose at either one eighth or 2 of the + available decimal digits in each computed singular value + (whichever is smaller). + + MAXITR INTEGER, default = 6 + MAXITR controls the maximum number of passes of the + algorithm through its inner loop. The algorithms stops + (and so fails to converge) if the number of passes + through the inner loop exceeds MAXITR*N**2. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, "L"); + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DBDSQR", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + dlasq1_(n, &d__[1], &e[1], &work[1], info); + return 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = EPSILON; + unfl = SAFEMINIMUM; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* + Compute singular values to relative accuracy TOL + (By setting TOL to be negative, algorithm will compute + singular values to absolute accuracy ABS(TOL)*norm(input matrix)) + + Computing MAX + Computing MIN +*/ + d__3 = 100., d__4 = pow_dd(&eps, &c_b94); + d__1 = 10., d__2 = min(d__3,d__4); + tolmul = max(d__1,d__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L30: */ + } + sminl = 0.; + if (tol >= 0.) { + +/* Relative accuracy desired */ + + sminoa = abs(d__[1]); + if (sminoa == 0.) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + sminoa = min(sminoa,mu); + if (sminoa == 0.) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((doublereal) (*n)); +/* Computing MAX */ + d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } else { + +/* + Absolute accuracy desired + + Computing MAX +*/ + d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; + thresh = max(d__1,d__2); + } + +/* + Prepare for main iteration loop for the singular values + (MAXIT is the maximum number of passes through the inner + loop permitted before nonconvergence signalled.) +*/ + + maxit = *n * 6 * *n; + iter = 0; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + if (iter > maxit) { + goto L200; + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { + d__[m] = 0.; + } + smax = (d__1 = d__[m], abs(d__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (d__1 = d__[ll], abs(d__1)); + abse = (d__1 = e[ll], abs(d__1)); + if (tol < 0. && abss <= thresh) { + d__[ll] = 0.; + } + if (abse <= thresh) { + goto L80; + } + smin = min(smin,abss); +/* Computing MAX */ + d__1 = max(smax,abss); + smax = max(d__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* + If working on new submatrix, choose shift direction + (from larger end diagonal element towards smaller) +*/ + + if (ll > oldm || m < oldll) { + if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* + Run convergence test in forward direction + First apply standard test to bottom of matrix +*/ + + if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( + d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) + { + e[m - 1] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* + If relative accuracy desired, + apply convergence criterion forward +*/ + + mu = (d__1 = d__[ll], abs(d__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + sminlo = sminl; + mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ + lll], abs(d__1)))); + sminl = min(sminl,mu); +/* L100: */ + } + } + + } else { + +/* + Run convergence test in backward direction + First apply standard test to top of matrix +*/ + + if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) + ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { + e[ll] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* + If relative accuracy desired, + apply convergence criterion backward +*/ + + mu = (d__1 = d__[m], abs(d__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + sminlo = sminl; + mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] + , abs(d__1)))); + sminl = min(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* + Compute shift. First, test if shifting would ruin relative + accuracy, and if so set the shift to zero. + + Computing MAX +*/ + d__1 = eps, d__2 = tol * .01; + if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (d__1 = d__[ll], abs(d__1)); + dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (d__1 = d__[m], abs(d__1)); + dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.) { +/* Computing 2nd power */ + d__1 = shift / sll; + if (d__1 * d__1 < eps) { + shift = 0.; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.) { + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + cs = 1.; + oldcs = 1.; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ + 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + cs = 1.; + oldcs = 1.; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ - 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[m] + ) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + dscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* + Sort the singular values into decreasing order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of DBDSQR */ + +} /* dbdsqr_ */ + +/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublereal *v, integer * + ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static integer ii; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static logical leftv; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DGEBAK forms the right or left eigenvectors of a real general matrix + by backward transformation on the computed eigenvectors of the + balanced matrix output by DGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to DGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by DGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) DOUBLE PRECISION array, dimension (N) + Details of the permutation and scaling factors, as returned + by DGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) DOUBLE PRECISION array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by DHSEIN or DTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of DGEBAK */ + +} /* dgebak_ */ + +/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer * + lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__, f, g; + static integer i__, j, k, l, m; + static doublereal r__, s, ca, ra; + static integer ica, ira, iexc; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static doublereal sfmin1, sfmin2, sfmax1, sfmax2; + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DGEBAL balances a general real matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) DOUBLE PRECISION array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine BALANC. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = SAFEMINIMUM / PRECISION; + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 8.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.; + r__ = 0.; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1)); + r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +L150: + ; + } + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 8.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = max(f,c__); +/* Computing MIN */ + d__2 = min(r__,g); + if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { + goto L170; + } + f *= 8.; + c__ *= 8.; + ca *= 8.; + r__ /= 8.; + g /= 8.; + ra /= 8.; + goto L160; + +L170: + g = c__ / 8.; +L180: +/* Computing MIN */ + d__1 = min(f,c__), d__1 = min(d__1,g); + if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 8.; + c__ /= 8.; + g /= 8.; + ca /= 8.; + r__ *= 8.; + ra *= 8.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of DGEBAL */ + +} /* dgebal_ */ + +/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DGEBD2 reduces a real general m by n matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; +/* Computing MIN */ + i__4 = i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ + i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of DGEBD2 */ + +} /* dgebd2_ */ + +/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nx; + static doublereal ws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlabrd_(integer *, integer *, integer * + , doublereal *, integer *, doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DGEBRD reduces a general real M-by-N matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) DOUBLE PRECISION array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("DGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.; + return 0; + } + + ws = (doublereal) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (doublereal) ((*m + *n) * nb); + if ((doublereal) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+nb-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+nb:m,i+nb:n), using an update + of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[ + i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = ws; + return 0; + +/* End of DGEBRD */ + +} /* dgebrd_ */ + +/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal * + a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, + integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, k; + static doublereal r__, cs, sn; + static integer ihi; + static doublereal scl; + static integer ilo; + static doublereal dum[1], eps; + static integer ibal; + static char side[1]; + static integer maxb; + static doublereal anrm; + static integer ierr, itau; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_( + char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dgebal_(char *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *); + static logical scalea; + + static doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, + integer *, doublereal *); + extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), xerbla_(char *, integer *); + static logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dhseqr_(char *, char *, integer *, integer *, integer + *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static doublereal smlnum; + static integer hswork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + December 8, 1999 + + + Purpose + ======= + + DGEEV computes for an N-by-N real nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of A are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + WR and WI contain the real and imaginary parts, + respectively, of the computed eigenvalues. Complex + conjugate pairs of eigenvalues appear consecutively + with the eigenvalue having the positive imaginary part + first. + + VL (output) DOUBLE PRECISION array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + If the j-th eigenvalue is real, then u(j) = VL(:,j), + the j-th column of VL. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and + u(j+1) = VL(:,j) - i*VL(:,j+1). + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) DOUBLE PRECISION array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + If the j-th eigenvalue is real, then v(j) = VR(:,j), + the j-th column of VR. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and + v(j+1) = VR(:,j) - i*VR(:,j+1). + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,3*N), and + if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good + performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements i+1:N of WR and WI contain eigenvalues which + have converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by DHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + minwrk = 1; + if (*info == 0 && (*lwork >= 1 || lquery)) { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + if (! wantvl && ! wantvr) { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + + hswork; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 2; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR" + "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } + work[1] = (doublereal) maxwrk; + } + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (Workspace: need N) +*/ + + ibal = 1; + dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate orthogonal matrix in VL + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate orthogonal matrix in VR + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from DHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (Workspace: need 4*N) +*/ + + dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (Workspace: need N) +*/ + + dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (Workspace: need N) +*/ + + dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + d__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGEEV */ + +} /* dgeev_ */ + +/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DGEHD2 reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of DGEHD2 */ + +} /* dgehd2_ */ + +/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer ib; + static doublereal ei; + static integer nb, nh, nx, iws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DGEHRD reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + +/* + Determine the block size. + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code. + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1. +*/ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ + i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); +/* L30: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal) iws; + + return 0; + +/* End of DGEHRD */ + +} /* dgehrd_ */ + +/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DGELQ2 computes an LQ factorization of a real m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* dgelq2_ */ + +/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DGELQF computes an LQ factorization of a real M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* dgelqf_ */ + +/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + static integer ie, il, mm; + static doublereal eps, anrm, bnrm; + static integer itau, nlvl, iascl, ibscl; + static doublereal sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), dlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *), dgeqrf_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *); + static integer wlalsd; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer minwrk, maxwrk; + static doublereal smlnum; + static logical lquery; + static integer smlsiz; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder transformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of A. M >= 0. + + N (input) INTEGER + The number of columns of A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution + matrix X. If m >= n and RANK = n, the residual + sum-of-squares for the solution in the i-th column is given + by the sum of squares of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,max(M,N)). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) DOUBLE PRECISION + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, + if M is greater than or equal to N or + 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace) INTEGER array, dimension (LIWORK) + LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, + where MINMN = MIN( M,N ). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( + ftnlen)1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + + smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + minwrk = 1; + minmn = max(1,minmn); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + nlvl = max(i__1,0); + + if (*info == 0) { + maxwrk = 0; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX +*/ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD" + , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR", + "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * + nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), + i__2 = *n * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * + nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& + c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, ( + ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ", + "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR" + , "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR", + "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), + i__2 = *m * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + minwrk = min(minwrk,maxwrk); + work[1] = (doublereal) maxwrk; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGELSD", &i__1); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = PRECISION; + sfmin = SAFEMINIMUM; + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b29, &c_b29, &s[1], &c__1); + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (Workspace: need N+NRHS, prefer N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A. + (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwork], & + ldwork); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL). + (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (Workspace: need M+NRHS, prefer M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A. + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGELSD */ + +} /* dgelsd_ */ + +/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DGEQR2 computes a QR factorization of a real m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* dgeqr2_ */ + +/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DGEQRF computes a QR factorization of a real M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* dgeqrf_ */ + +/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *s, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static doublereal dum[1], eps; + static integer ivt, iscl; + static doublereal anrm; + static integer idum[1], ierr, itau; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + static integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *); + static integer bdspac; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *), dorgbr_(char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + static doublereal smlnum; + static logical wntqas, lquery; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DGESDD computes the singular value decomposition (SVD) of a real + M-by-N matrix A, optionally computing the left and right singular + vectors. If singular vectors are desired, it uses a + divide-and-conquer algorithm. + + The SVD is written + + A = U * SIGMA * transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**T, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**T are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**T are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**T are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**T are overwritten + in the array VT; + = 'N': no columns of U or rows of V**T are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**T (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) DOUBLE PRECISION array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + orthogonal matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) DOUBLE PRECISION array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N orthogonal matrix V**T; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**T (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK; + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + If JOBZ = 'N', + LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). + If JOBZ = 'O', + LWORK >= 3*min(M,N)*min(M,N) + + max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). + If JOBZ = 'S' or 'A' + LWORK >= 3*min(M,N)*min(M,N) + + max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). + For good performance, LWORK should generally be larger. + If LWORK < 0 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: DBDSDC did not converge, updating process failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr = (integer) (minmn * 11. / 6.); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* Compute space needed for DBDSDC */ + + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } + } else { + +/* Path 5 (M at least N, but not much larger) */ + + wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } + } + } else { + +/* Compute space needed for DBDSDC */ + + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } + } else { + +/* Path 5t (N greater than M, but not much larger) */ + + wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR" + , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } + } + } + work[1] = (doublereal) maxwrk; + } + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + if (*lwork >= 1) { + work[1] = 1.; + } + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = sqrt(SAFEMINIMUM) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need 4*N, prefer 3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ = 'O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in VT, copying result to WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R + and VT by right singular vectors of R + (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagoal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (Workspace: need N*N) +*/ + + dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + dgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b29, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (Workspace: need N*N) +*/ + + dgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b29, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* + M .LT. MNTHR + + Path 5 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition +*/ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need N+BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku); + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* + Overwrite VT by right singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* + Overwrite WORK(IU) by left singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by left singular vectors of + bidiagonal matrix in WORK(IU), storing result in + WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b29, & + work[ir], &ldwrkr); + dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dlaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + dlaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu); + dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n + + 1) * u_dim1], ldu); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need 4*M, prefer 3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* IVT is M by M */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + +/* WORK(IL) is M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U, and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by Q + in A, storing result in WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b29, &work[il], & + ldwrkl); + dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IU), copying result to U + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and VT + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by + Q in A, storing result in VT + (Workspace: need M*M) +*/ + + dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + dgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b29, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 4t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (Workspace: need M*M) +*/ + + dgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b29, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* + N .LT. MNTHR + + Path 5t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition +*/ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need M+BDSPAC) +*/ + + dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + dlaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M*M+BDSPAC) +*/ + + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* + Overwrite WORK(IVT) by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**T in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by right singular vectors of + bidiagonal matrix in WORK(IVT), storing result in + WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, & + work[il], m); + dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dlaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*M, prefer 2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + dlaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m + + 1) * vt_dim1], ldvt); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 2*M+N, prefer 2*M+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESDD */ + +} /* dgesdd_ */ + +/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DGESV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of DGESV */ + +} /* dgesv_ */ + +/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer j, jp; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dscal_(integer *, doublereal *, doublereal *, integer + *), dswap_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 + + + Purpose + ======= + + DGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + dger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], + lda); + } +/* L10: */ + } + return 0; + +/* End of DGETF2 */ + +} /* dgetf2_ */ + +/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer iinfo; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dgetf2_( + integer *, integer *, doublereal *, integer *, integer *, integer + *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb) + * a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* dgetrf_ */ + +/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), xerbla_( + char *, integer *), dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DGETRS solves a system of linear equations + A * X = B or A' * X = B + with a general N-by-N matrix A using the LU factorization computed + by DGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A'* X = B (Transpose) + = 'C': A'* X = B (Conjugate transpose = Transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by DGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from DGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A' * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of DGETRS */ + +} /* dgetrs_ */ + +/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, + doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4, + i__5; + doublereal d__1, d__2; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__, j, k, l; + static doublereal s[225] /* was [15][15] */, v[16]; + static integer i1, i2, ii, nh, nr, ns, nv; + static doublereal vv[16]; + static integer itn; + static doublereal tau; + static integer its; + static doublereal ulp, tst1; + static integer maxb; + static doublereal absw; + static integer ierr; + static doublereal unfl, temp, ovfl; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static integer itemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static logical initz, wantt, wantz; + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + extern integer idamax_(integer *, doublereal *, integer *); + extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, + doublereal *); + extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *), dlacpy_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *), dlaset_(char *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *); + static doublereal smlnum; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur + form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal matrix Q, + so that this routine can give the Schur factorization of a matrix A + which has been reduced to the Hessenberg form H by the orthogonal + matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an orthogonal matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to DGEBAL, and then passed to SGEHRD + when the matrix output by DGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if JOB = 'S', H contains the upper quasi-triangular + matrix T from the Schur decomposition (the Schur form); + 2-by-2 diagonal blocks (corresponding to complex conjugate + pairs of eigenvalues) are returned in standard form, with + H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', + the contents of H are unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues. If two eigenvalues are computed as a complex + conjugate pair, they are stored in consecutive elements of + WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and + WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the + same order as on the diagonal of the Schur form returned in + H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 + diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + If COMPZ = 'N': Z is not referenced. + If COMPZ = 'I': on entry, Z need not be set, and on exit, Z + contains the orthogonal matrix Z of the Schur vectors of H. + If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, + which is assumed to be equal to the unit matrix except for + the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. + Normally Q is the orthogonal matrix generated by DORGHR after + the call to DGEHRD which formed the Hessenberg matrix H. + + LDZ (input) INTEGER + The leading dimension of the array Z. + LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, DHSEQR failed to compute all of the + eigenvalues in a total of 30*(IHI-ILO+1) iterations; + elements 1:ilo-1 and i+1:n of WR and WI contain those + eigenvalues which have been successfully computed. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + + *info = 0; + work[1] = (doublereal) max(1,*n); + lquery = *lwork == -1; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DHSEQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Z, if necessary */ + + if (initz) { + dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Store the eigenvalues isolated by DGEBAL. */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; +/* L20: */ + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + +/* + Set rows and columns ILO to IHI to zero below the first + subdiagonal. +*/ + + i__1 = *ihi - 2; + for (j = *ilo; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j + 2; i__ <= i__2; ++i__) { + h__[i__ + j * h_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + nh = *ihi - *ilo + 1; + +/* + Determine the order of the multi-shift QR algorithm to be used. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (ns <= 2 || ns > nh || maxb >= nh) { + +/* Use the standard double-shift algorithm */ + + dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, info); + return 0; + } + maxb = max(3,maxb); +/* Computing MIN */ + i__1 = min(ns,maxb); + ns = min(i__1,15); + +/* + Now 2 < NS <= MAXB < NH. + + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of multiple-shift QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of at most MAXB. Each iteration of the loop + works with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L50: + l = *ilo; + if (i__ < *ilo) { + goto L170; + } + +/* + Perform multiple-shift QR iterations on rows and columns ILO to I + until a submatrix of order at most MAXB splits off at the bottom + because a subdiagonal element has become negligible. +*/ + + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[k + k * h_dim1], abs(d__2)); + if (tst1 == 0.) { + i__4 = i__ - l + 1; + tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1] + ); + } +/* Computing MAX */ + d__2 = ulp * tst1; + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2, + smlnum)) { + goto L70; + } +/* L60: */ + } +L70: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible. */ + + h__[l + (l - 1) * h_dim1] = 0.; + } + +/* Exit from loop if a submatrix of order <= MAXB has split off. */ + + if (l >= i__ - maxb + 1) { + goto L160; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! wantt) { + i1 = l; + i2 = i__; + } + + if (its == 20 || its == 30) { + +/* Exceptional shifts. */ + + i__2 = i__; + for (ii = i__ - ns + 1; ii <= i__2; ++ii) { + wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + ( + d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5; + wi[ii] = 0.; +/* L80: */ + } + } else { + +/* Use eigenvalues of trailing submatrix of order NS as shifts. */ + + dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * + h_dim1], ldh, s, &c__15); + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ - + ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset], + ldz, &ierr); + if (ierr > 0) { + +/* + If DLAHQR failed to compute all NS eigenvalues, use the + unconverged diagonal elements as the remaining shifts. +*/ + + i__2 = ierr; + for (ii = 1; ii <= i__2; ++ii) { + wr[i__ - ns + ii] = s[ii + ii * 15 - 16]; + wi[i__ - ns + ii] = 0.; +/* L90: */ + } + } + } + +/* + Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) + where G is the Hessenberg submatrix H(L:I,L:I) and w is + the vector of shifts (stored in WR and WI). The result is + stored in the local array V. +*/ + + v[0] = 1.; + i__2 = ns + 1; + for (ii = 2; ii <= i__2; ++ii) { + v[ii - 1] = 0.; +/* L100: */ + } + nv = 1; + i__2 = i__; + for (j = i__ - ns + 1; j <= i__2; ++j) { + if (wi[j] >= 0.) { + if (wi[j] == 0.) { + +/* real shift */ + + i__4 = nv + 1; + dcopy_(&i__4, v, &c__1, vv, &c__1); + i__4 = nv + 1; + d__1 = -wr[j]; + dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l * + h_dim1], ldh, vv, &c__1, &d__1, v, &c__1); + ++nv; + } else if (wi[j] > 0.) { + +/* complex conjugate pair of shifts */ + + i__4 = nv + 1; + dcopy_(&i__4, v, &c__1, vv, &c__1); + i__4 = nv + 1; + d__1 = wr[j] * -2.; + dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l * + h_dim1], ldh, v, &c__1, &d__1, vv, &c__1); + i__4 = nv + 1; + itemp = idamax_(&i__4, vv, &c__1); +/* Computing MAX */ + d__2 = (d__1 = vv[itemp - 1], abs(d__1)); + temp = 1. / max(d__2,smlnum); + i__4 = nv + 1; + dscal_(&i__4, &temp, vv, &c__1); + absw = dlapy2_(&wr[j], &wi[j]); + temp = temp * absw * absw; + i__4 = nv + 2; + i__5 = nv + 1; + dgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l * + h_dim1], ldh, vv, &c__1, &temp, v, &c__1); + nv += 2; + } + +/* + Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, + reset it to the unit vector. +*/ + + itemp = idamax_(&nv, v, &c__1); + temp = (d__1 = v[itemp - 1], abs(d__1)); + if (temp == 0.) { + v[0] = 1.; + i__4 = nv; + for (ii = 2; ii <= i__4; ++ii) { + v[ii - 1] = 0.; +/* L110: */ + } + } else { + temp = max(temp,smlnum); + d__1 = 1. / temp; + dscal_(&nv, &d__1, v, &c__1); + } + } +/* L120: */ + } + +/* Multiple-shift QR step */ + + i__2 = i__ - 1; + for (k = l; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__4 = ns + 1, i__5 = i__ - k + 1; + nr = min(i__4,i__5); + if (k > l) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &tau); + if (k > l) { + h__[k + (k - 1) * h_dim1] = v[0]; + i__4 = i__; + for (ii = k + 1; ii <= i__4; ++ii) { + h__[ii + (k - 1) * h_dim1] = 0.; +/* L130: */ + } + } + v[0] = 1.; + +/* + Apply G from the left to transform the rows of the matrix in + columns K to I2. +*/ + + i__4 = i2 - k + 1; + dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, & + work[1]); + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+NR,I). + + Computing MIN +*/ + i__5 = k + nr; + i__4 = min(i__5,i__) - i1 + 1; + dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, + &work[1]); + + if (wantz) { + +/* Accumulate transformations in the matrix Z */ + + dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], + ldz, &work[1]); + } +/* L140: */ + } + +/* L150: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L160: + +/* + A submatrix of order <= MAXB in rows and columns L to I has split + off. Use the double-shift QR algorithm to handle it. +*/ + + dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1], + ilo, ihi, &z__[z_offset], ldz, info); + if (*info > 0) { + return 0; + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with a new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L50; + +L170: + work[1] = (doublereal) max(1,*n); + return 0; + +/* End of DHSEQR */ + +} /* dhseqr_ */ + +/* Subroutine */ int dlabad_(doublereal *small, doublereal *large) +{ + /* Builtin functions */ + double d_lg10(doublereal *), sqrt(doublereal); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLABAD takes as input the values computed by DLAMCH for underflow and + overflow, and returns the square root of each of these values if the + log of LARGE is sufficiently large. This subroutine is intended to + identify machines with a large exponent range, such as the Crays, and + redefine the underflow and overflow limits to be the square roots of + the values computed by DLAMCH. This subroutine is needed because + DLAMCH does not compensate for poor arithmetic in the upper half of + the exponent range, as is found on a Cray. + + Arguments + ========= + + SMALL (input/output) DOUBLE PRECISION + On entry, the underflow threshold as computed by DLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of SMALL, otherwise unchanged. + + LARGE (input/output) DOUBLE PRECISION + On entry, the overflow threshold as computed by DLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of LARGE, otherwise unchanged. + + ===================================================================== + + + If it looks like we're on a Cray, take the square root of + SMALL and LARGE to avoid overflow and underflow problems. +*/ + + if (d_lg10(large) > 2e3) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of DLABAD */ + +} /* dlabad_ */ + +/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLABRD reduces the first NB rows and columns of a real general + m by n matrix A to upper or lower bidiagonal form by an orthogonal + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by DGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the orthogonal matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) DOUBLE PRECISION array dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) DOUBLE PRECISION array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + X (output) DOUBLE PRECISION array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= M. + + Y (output) DOUBLE PRECISION array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (output) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1], + ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[ + i__ + (i__ + 1) * a_dim1], lda); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + dgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b29, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1], + ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *m - i__; + dgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + dgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + dgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of DLABRD */ + +} /* dlabrd_ */ + +/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper triangle + or trapezoid is accessed; if UPLO = 'L', only the lower + triangle or trapezoid is accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) DOUBLE PRECISION array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of DLACPY */ + +} /* dlacpy_ */ + +/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q) +{ + static doublereal e, f; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLADIV performs complex division in real arithmetic + + a + i*b + p + i*q = --------- + c + i*d + + The algorithm is due to Robert L. Smith and can be found + in D. Knuth, The art of Computer Programming, Vol.2, p.195 + + Arguments + ========= + + A (input) DOUBLE PRECISION + B (input) DOUBLE PRECISION + C (input) DOUBLE PRECISION + D (input) DOUBLE PRECISION + The scalars a, b, c, and d in the above expression. + + P (output) DOUBLE PRECISION + Q (output) DOUBLE PRECISION + The scalars p and q in the above expression. + + ===================================================================== +*/ + + + if (abs(*d__) < abs(*c__)) { + e = *d__ / *c__; + f = *c__ + *d__ * e; + *p = (*a + *b * e) / f; + *q = (*b - *a * e) / f; + } else { + e = *c__ / *d__; + f = *d__ + *c__ * e; + *p = (*b + *a * e) / f; + *q = (-(*a) + *b * e) / f; + } + + return 0; + +/* End of DLADIV */ + +} /* dladiv_ */ + +/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, and RT2 + is the eigenvalue of smaller absolute value. + + Arguments + ========= + + A (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + B (input) DOUBLE PRECISION + The (1,2) and (2,1) elements of the 2-by-2 matrix. + + C (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) DOUBLE PRECISION + The eigenvalue of larger absolute value. + + RT2 (output) DOUBLE PRECISION + The eigenvalue of smaller absolute value. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + } + return 0; + +/* End of DLAE2 */ + +} /* dlae2_ */ + +/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + static doublereal temp; + static integer curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxq, iwrem; + extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + static integer iqptr; + extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *); + static integer tlvls; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static integer curlvl, matsiz, iprmpt, smlsiz; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAED0 computes all eigenvalues and corresponding eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + = 2: Compute eigenvalues and eigenvectors of tridiagonal + matrix. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the main diagonal of the tridiagonal matrix. + On exit, its eigenvalues. + + E (input) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, Q must contain an N-by-N orthogonal matrix. + If ICOMPQ = 0 Q is not referenced. + If ICOMPQ = 1 On entry, Q is a subset of the columns of the + orthogonal matrix used to reduce the full + matrix to tridiagonal form corresponding to + the subset of the full matrix which is being + decomposed at this time. + If ICOMPQ = 2 On entry, Q will be the identity matrix. + On exit, Q contains the eigenvectors of the + tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. If eigenvectors are + desired, then LDQ >= max(1,N). In any case, LDQ >= 1. + + QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) + Referenced only when ICOMPQ = 1. Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. If ICOMPQ = 1, + then LDQS >= max(1,N). In any case, LDQS >= 1. + + WORK (workspace) DOUBLE PRECISION array, + If ICOMPQ = 0 or 1, the dimension of WORK must be at least + 1 + 3*N + 2*N*lg N + 2*N**2 + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of WORK must be at least + 4*N + N**2. + + IWORK (workspace) INTEGER array, + If ICOMPQ = 0 or 1, the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N. + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of IWORK must be at least + 3 + 5*N. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info); + if (*info != 0) { + goto L130; + } + } else { + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1], + ldqs); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. + DLAED1 is used only for the full eigensystem of a tridiagonal + matrix. + DLAED7 handles the cases in which eigenvalues only or eigenvalues + and eigenvectors of a full symmetric matrix (which was reduced to + tridiagonal form) are desired. +*/ + + if (*icompq == 2) { + dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] + , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of DLAED0 */ + +} /* dlaed0_ */ + +/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxp; + extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *, integer *), dlaed3_(integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *); + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAED1 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles + the case in which eigenvalues only or eigenvalues and eigenvectors + of a full symmetric matrix (which was reduced to tridiagonal form) + are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by DLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + On entry, the permutation which separately sorts the two + subproblems in D into ascending order. + On exit, the permutation which will reintegrate the + subproblems back into sorted order, + i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. + + RHO (input) DOUBLE PRECISION + The subdiagonal entry used to create the rank-1 modification. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= CUTPNT <= N/2. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N + N**2) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED1", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are integer pointers which indicate + the portion of the workspace + used by a particular array in DLAED2 and DLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + zpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of DLAED1 */ + +} /* dlaed1_ */ + +/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + static doublereal eps, tau, tol; + static integer psm[4], imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer ctot[4]; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLAED2 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. 0 <= K <=N. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + N1 (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= N1 <= N/2. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. + On exit, D contains the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, Q contains the eigenvectors of two submatrices in + the two square blocks with corners at (1,1), (N1,N1) + and (N1+1, N1+1), (N,N). + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have N1 added to their + values. Destroyed on exit. + + RHO (input/output) DOUBLE PRECISION + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + DLAED3. + + Z (input) DOUBLE PRECISION array, dimension (N) + On entry, Z contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + On exit, the contents of Z have been destroyed by the updating + process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + A copy of the first K eigenvalues which will be used by + DLAED3 to form the secular equation. + + W (output) DOUBLE PRECISION array, dimension (N) + The first k values of the final deflation-altered z-vector + which will be passed to DLAED3. + + Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) + A copy of the first K eigenvectors which will be used by + DLAED3 in a matrix multiply (DGEMM) to solve for the new + eigenvectors. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of DLAMDA into + ascending order. + + INDXC (output) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups: the first group contains non-zero + elements only at and above N1, the second contains + non-zero elements only below N1, and the third is dense. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + COLTYP (workspace/output) INTEGER array, dimension (N) + During execution, a label which will indicate which of the + following types a column in the Q2 matrix is: + 1 : non-zero in the upper half only; + 2 : dense; + 3 : non-zero in the lower half only; + 4 : deflated. + On exit, COLTYP(i) is the number of columns of type i, + for i=1 to 4 only. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* + Normalize z so that norm(z) = 1. Since z is the concatenation of + two normalized vectors, norm2(z) = sqrt(2). +*/ + + t = 1. / sqrt(2.); + dscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; +/* Computing MAX */ + d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) + ; + tol = eps * 8. * max(d__3,d__4); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); + dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + d__1 = c__; +/* Computing 2nd power */ + d__2 = s; + t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); +/* Computing 2nd power */ + d__1 = s; +/* Computing 2nd power */ + d__2 = c__; + d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four uniform groups (although one or more of these groups may be + empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* + Fill out the INDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); + i__1 = *n - *k; + dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Copy CTOT into COLTYP for referencing in DLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of DLAED2 */ + +} /* dlaed2_ */ + +/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer i__, j, n2, n12, ii, n23, iq2; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *), dlaed4_(integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAED3 finds the roots of the secular equation, as defined by the + values in D, W, and RHO, between 1 and K. It makes the + appropriate calls to DLAED4 and then updates the eigenvectors by + multiplying the matrix of eigenvectors of the pair of eigensystems + being combined by the matrix of eigenvectors of the K-by-K system + which is solved here. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + DLAED4. K >= 0. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (deflation may result in N>K). + + N1 (input) INTEGER + The location of the last eigenvalue in the leading submatrix. + min(1,N) <= N1 <= N/2. + + D (output) DOUBLE PRECISION array, dimension (N) + D(I) contains the updated eigenvalues for + 1 <= I <= K. + + Q (output) DOUBLE PRECISION array, dimension (LDQ,N) + Initially the first K columns are used as workspace. + On output the columns 1 to K contain + the updated eigenvectors. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) DOUBLE PRECISION + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. May be changed on output by + having lowest order bit set to zero on Cray X-MP, Cray Y-MP, + Cray-2, or Cray C-90, as described above. + + Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) + The first K columns of this matrix contain the non-deflated + eigenvectors for the split problem. + + INDX (input) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups (see DLAED2). + The rows of the eigenvectors found by DLAED4 must be likewise + permuted before the matrix multiply can take place. + + CTOT (input) INTEGER array, dimension (4) + A count of the total number of the various types of columns + in Q, as described in INDX. The fourth column type is any + column which has been deflated. + + W (input/output) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. Destroyed on + output. + + S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K + Will contain the eigenvectors of the repaired matrix which + will be multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max(1,K). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = dnrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + dgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, & + c_b29, &q[*n1 + 1 + q_dim1], ldq); + } else { + dlaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq); + } + + dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); + if (n12 != 0) { + dgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29, + &q[q_offset], ldq); + } else { + dlaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq); + } + + +L120: + return 0; + +/* End of DLAED3 */ + +} /* dlaed3_ */ + +/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal a, b, c__; + static integer j; + static doublereal w; + static integer ii; + static doublereal dw, zz[3]; + static integer ip1; + static doublereal del, eta, phi, eps, tau, psi; + static integer iim1, iip1; + static doublereal dphi, dpsi; + static integer iter; + static doublereal temp, prew, temp1, dltlb, dltub, midpt; + static integer niter; + static logical swtch; + extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), dlaed6_(integer *, + logical *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + static logical swtch3; + + static logical orgati; + static doublereal erretm, rhoinv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + December 23, 1999 + + + Purpose + ======= + + This subroutine computes the I-th updated eigenvalue of a symmetric + rank-one modification to a diagonal matrix whose elements are + given in the array d, and that + + D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) DOUBLE PRECISION array, dimension (N) + The original eigenvalues. It is assumed that they are in + order, D(I) < D(J) for I < J. + + Z (input) DOUBLE PRECISION array, dimension (N) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension (N) + If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DLAM (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.; + return 0; + } + if (*n == 2) { + dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = EPSILON; + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* + It can be proved that + D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +*/ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* + It can be proved that + D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +*/ + + dltlb = 0.; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { +/* + ETA = B/A + ETA = RHO - TAU +*/ + eta = dltub - tau; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.) { + +/* + d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + dltlb = 0.; + dltub = midpt; + } else { + +/* + (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + dltlb = -midpt; + dltub = 0.; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * + d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * + d__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + prew = w; + +/* L170: */ + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( + d__1 = tau + eta, abs(d__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (d__1 * d__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of DLAED4 */ + +} /* dlaed4_ */ + +/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal b, c__, w, del, tau, temp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + This subroutine computes the I-th eigenvalue of a symmetric rank-one + modification of a 2-by-2 diagonal matrix + + diag( D ) + RHO * Z * transpose(Z) . + + The diagonal elements in the array D are assumed to satisfy + + D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) DOUBLE PRECISION array, dimension (2) + The original eigenvalues. We assume D(1) < D(2). + + Z (input) DOUBLE PRECISION array, dimension (2) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension (2) + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DLAM (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; + if (w > 0.) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End OF DLAED5 */ + +} /* dlaed5_ */ + +/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + static doublereal a, b, c__, f; + static integer i__; + static doublereal fc, df, ddf, eta, eps, base; + static integer iter; + static doublereal temp, temp1, temp2, temp3, temp4; + static logical scale; + static integer niter; + static doublereal small1, small2, sminv1, sminv2; + + static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAED6 computes the positive or negative root (closest to the origin) + of + z(1) z(2) z(3) + f(x) = rho + --------- + ---------- + --------- + d(1)-x d(2)-x d(3)-x + + It is assumed that + + if ORGATI = .true. the root is between d(2) and d(3); + otherwise it is between d(1) and d(2) + + This routine will be called by DLAED4 when necessary. In most cases, + the root sought is the smallest in magnitude, though it might not be + in some extremely rare situations. + + Arguments + ========= + + KNITER (input) INTEGER + Refer to DLAED4 for its significance. + + ORGATI (input) LOGICAL + If ORGATI is true, the needed root is between d(2) and + d(3); otherwise it is between d(1) and d(2). See + DLAED4 for further details. + + RHO (input) DOUBLE PRECISION + Refer to the equation f(x) above. + + D (input) DOUBLE PRECISION array, dimension (3) + D satisfies d(1) < d(2) < d(3). + + Z (input) DOUBLE PRECISION array, dimension (3) + Each of the elements in z must be positive. + + FINIT (input) DOUBLE PRECISION + The value of f at 0. It is more accurate than the one + evaluated inside this routine (if someone wants to do + so). + + TAU (output) DOUBLE PRECISION + The root of the equation f(x). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, failure to converge + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + + *info = 0; + + niter = 1; + *tau = 0.; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + *tau = b / a; + } else if (a <= 0.) { + *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) + )); + } + temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) + + z__[3] / (d__[3] - *tau); + if (abs(*finit) <= abs(temp)) { + *tau = 0.; + } + } + +/* + On first call to routine, get machine parameters for + possible scaling to avoid overflow +*/ + + if (first) { + eps = EPSILON; + base = BASE; + i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.); + small1 = pow_di(&base, &i__1); + sminv1 = 1. / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + first = FALSE_; + } + +/* + Determine if scaling of inputs necessary to avoid overflow + when computing 1/TEMP**3 +*/ + + if (*orgati) { +/* Computing MIN */ + d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } else { +/* Computing MIN */ + d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (abs(f) <= 0.) { + goto L60; + } + +/* + Iteration begins + + It is not hard to see that + + 1) Iterations will go up monotonically + if FINIT < 0; + + 2) Iterations will go down monotonically + if FINIT > 0. +*/ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + if (f * eta >= 0.) { + eta = -f / df; + } + + temp = eta + *tau; + if (*orgati) { + if (eta > 0. && temp >= dscale[2]) { + eta = (dscale[2] - *tau) / 2.; + } + if (eta < 0. && temp <= dscale[1]) { + eta = (dscale[1] - *tau) / 2.; + } + } else { + if (eta > 0. && temp >= dscale[1]) { + eta = (dscale[1] - *tau) / 2.; + } + if (eta < 0. && temp <= dscale[0]) { + eta = (dscale[0] - *tau) / 2.; + } + } + *tau += eta; + + fc = 0.; + erretm = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += abs(temp4); + df += temp2; + ddf += temp3; +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; + if (abs(f) <= eps * erretm) { + goto L60; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of DLAED6 */ + +} /* dlaed6_ */ + +/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer indxc, indxp; + extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *), dlaed9_(integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, doublereal *, integer *) + ; + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense symmetric matrix + that has been reduced to tridiagonal form. DLAED1 handles + the case in which all eigenvalues and eigenvectors of a symmetric + tridiagonal matrix are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED8. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by DLAED9). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= CURLVL <= TLVLS. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (output) INTEGER array, dimension (N) + The permutation which will reintegrate the subproblem just + solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) + will be in ascending order. + + RHO (input) DOUBLE PRECISION + The subdiagonal element used to create the rank-1 + modification. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + WORK (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLAED8 and DLAED9. +*/ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + dgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b29, &q[q_offset], ldq); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of DLAED7 */ + +} /* dlaed7_ */ + +/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n1, n2, jp, n1p1; + static doublereal eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dscal_( + integer *, doublereal *, doublereal *, integer *), dcopy_(integer + *, doublereal *, integer *, doublereal *, integer *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the two submatrices to be + combined. On exit, the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) + If ICOMPQ = 0, Q is not referenced. Otherwise, + on entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have CUTPNT added to + their values in order to be accurate. + + RHO (input/output) DOUBLE PRECISION + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + DLAED3. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + Z (input) DOUBLE PRECISION array, dimension (N) + On entry, Z contains the updating vector (the last row of + the first sub-eigenvector matrix and the first row of the + second sub-eigenvector matrix). + On exit, the contents of Z are destroyed by the updating + process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + A copy of the first K eigenvalues which will be used by + DLAED3 to form the secular equation. + + Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + a copy of the first K eigenvectors which will be used by + DLAED7 in a matrix multiply (DGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + The first k values of the final deflation-altered z-vector and + will be passed to DLAED3. + + PERM (output) INTEGER array, dimension (N) + The permutations (from deflation and sorting) to be applied + to each eigenblock. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of D into ascending + order. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerence */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + } + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + *givptr = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L130: */ + } + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq); + } + } + + return 0; + +/* End of DLAED8 */ + +} /* dlaed8_ */ + +/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer i__, j; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaed4_(integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAED9 finds the roots of the secular equation, as defined by the + values in D, Z, and RHO, between KSTART and KSTOP. It makes the + appropriate calls to DLAED4 and then stores the new matrix of + eigenvectors for use in calculating the next level of Z vectors. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + DLAED4. K >= 0. + + KSTART (input) INTEGER + KSTOP (input) INTEGER + The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP + are to be computed. 1 <= KSTART <= KSTOP <= K. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (delation may result in N > K). + + D (output) DOUBLE PRECISION array, dimension (N) + D(I) contains the updated eigenvalues + for KSTART <= I <= KSTOP. + + Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + RHO (input) DOUBLE PRECISION + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + W (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. + + S (output) DOUBLE PRECISION array, dimension (LDS, K) + Will contain the eigenvectors of the repaired matrix which + will be stored for subsequent Z vector calculation and + multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max( 1, K ). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAED9", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of DLAED9 */ + +} /* dlaed9_ */ + +/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, k, mid, ptr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAEDA computes the Z vector corresponding to the merge step in the + CURLVLth step of the merge process with TLVLS steps for the CURPBMth + problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and incidentally the + size of the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + Q (input) DOUBLE PRECISION array, dimension (N**2) + Contains the square eigenblocks from previous levels, the + starting positions for blocks are given by QPTR. + + QPTR (input) INTEGER array, dimension (N+2) + Contains a list of pointers which indicate where in Q an + eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates + the size of the block. + + Z (output) DOUBLE PRECISION array, dimension (N) + On output this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + + ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAEDA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* + Determine location of lowest level subproblem in the full storage + scheme +*/ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these square + roots. +*/ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + + .5); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.; +/* L10: */ + } + dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.; +/* L20: */ + } + +/* + Loop thru remaining levels 1 -> CURLVL applying the Givens + rotations and permutation and then multiplying the center matrices + against the current Z. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* + Multiply Blocks at CURR and CURR+1 + + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these + square roots. +*/ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) + ) + .5); + if (bsiz1 > 0) { + dgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1); + } + i__2 = psiz1 - bsiz1; + dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + dgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1); + } + i__2 = psiz2 - bsiz2; + dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of DLAEDA */ + +} /* dlaeda_ */ + +/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + static integer sgn1, sgn2; + static doublereal acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + eigenvector for RT1, giving the decomposition + + [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + Arguments + ========= + + A (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + B (input) DOUBLE PRECISION + The (1,2) element and the conjugate of the (2,1) element of + the 2-by-2 matrix. + + C (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) DOUBLE PRECISION + The eigenvalue of larger absolute value. + + RT2 (output) DOUBLE PRECISION + The eigenvalue of smaller absolute value. + + CS1 (output) DOUBLE PRECISION + SN1 (output) DOUBLE PRECISION + The vector (CS1, SN1) is a unit right eigenvector for RT1. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + CS1 and SN1 are accurate to a few ulps barring over/underflow. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + sgn1 = -1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + sgn1 = 1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = abs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1. / sqrt(ct * ct + 1.); + *cs1 = ct * *sn1; + } else { + if (ab == 0.) { + *cs1 = 1.; + *sn1 = 0.; + } else { + tn = -cs / tb; + *cs1 = 1. / sqrt(tn * tn + 1.); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of DLAEV2 */ + +} /* dlaev2_ */ + +/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal + *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer i__, j, k, l, m; + static doublereal s, v[3]; + static integer i1, i2; + static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, + h33, h44; + static integer nh; + static doublereal cs; + static integer nr; + static doublereal sn; + static integer nz; + static doublereal ave, h33s, h44s; + static integer itn, its; + static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static doublereal work[1]; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlanv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), dlabad_( + doublereal *, doublereal *); + + extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, + integer *, doublereal *); + extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, + doublereal *); + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAHQR is an auxiliary routine called by DHSEQR to update the + eigenvalues and Schur decomposition already computed by DHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper quasi-triangular in + rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless + ILO = 1). DLAHQR works primarily with the Hessenberg + submatrix in rows and columns ILO to IHI, but applies + transformations to all of H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) DOUBLE PRECISION array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if WANTT is .TRUE., H is upper quasi-triangular in + rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in + standard form. If WANTT is .FALSE., the contents of H are + unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) DOUBLE PRECISION array, dimension (N) + WI (output) DOUBLE PRECISION array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues ILO to IHI are stored in the corresponding + elements of WR and WI. If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with WR(i) = H(i,i), and, if + H(i:i+1,i:i+1) is a 2-by-2 diagonal block, + WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by DHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI + in a total of 30*(IHI-ILO+1) iterations; if INFO = i, + elements i+1:ihi of WR and WI contain those eigenvalues + which have been successfully computed. + + Further Details + =============== + + 2-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1 or 2. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L10: + l = *ilo; + if (i__ < *ilo) { + goto L150; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 or 2 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[k + k * h_dim1], abs(d__2)); + if (tst1 == 0.) { + i__3 = i__ - l + 1; + tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work); + } +/* Computing MAX */ + d__2 = ulp * tst1; + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2, + smlnum)) { + goto L30; + } +/* L20: */ + } +L30: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + h__[l + (l - 1) * h_dim1] = 0.; + } + +/* Exit from loop if a submatrix of order 1 or 2 has split off. */ + + if (l >= i__ - 1) { + goto L140; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10 || its == 20) { + +/* Exceptional shift. */ + + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 = + h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h44 = s * .75 + h__[i__ + i__ * h_dim1]; + h33 = h44; + h43h34 = s * -.4375 * s; + } else { + +/* + Prepare to use Francis' double shift + (i.e. 2nd degree generalized Rayleigh quotient) +*/ + + h44 = h__[i__ + i__ * h_dim1]; + h33 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * + h_dim1]; + s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) * + h_dim1]; + disc = (h33 - h44) * .5; + disc = disc * disc + h43h34; + if (disc > 0.) { + +/* Real roots: use Wilkinson's shift twice */ + + disc = sqrt(disc); + ave = (h33 + h44) * .5; + if (abs(h33) - abs(h44) > 0.) { + h33 = h33 * h44 - h43h34; + h44 = h33 / (d_sign(&disc, &ave) + ave); + } else { + h44 = d_sign(&disc, &ave) + ave; + } + h33 = h44; + h43h34 = 0.; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__2 = l; + for (m = i__ - 2; m >= i__2; --m) { +/* + Determine the effect of starting the double-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + h11 = h__[m + m * h_dim1]; + h22 = h__[m + 1 + (m + 1) * h_dim1]; + h21 = h__[m + 1 + m * h_dim1]; + h12 = h__[m + (m + 1) * h_dim1]; + h44s = h44 - h11; + h33s = h33 - h11; + v1 = (h33s * h44s - h43h34) / h21 + h12; + v2 = h22 - h11 - h33s - h44s; + v3 = h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v1) + abs(v2) + abs(v3); + v1 /= s; + v2 /= s; + v3 /= s; + v[0] = v1; + v[1] = v2; + v[2] = v3; + if (m == l) { + goto L50; + } + h00 = h__[m - 1 + (m - 1) * h_dim1]; + h10 = h__[m + (m - 1) * h_dim1]; + tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22)); + if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) { + goto L50; + } +/* L40: */ + } +L50: + +/* Double-shift QR step */ + + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__3 = 3, i__4 = i__ - k + 1; + nr = min(i__3,i__4); + if (k > m) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { + h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1]; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; +/* L60: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). + + Computing MIN +*/ + i__4 = k + 3; + i__3 = min(i__4,i__); + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; +/* L70: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; +/* L80: */ + } + } + } else if (nr == 2) { + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; +/* L90: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). +*/ + + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + ; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; +/* L100: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; +/* L110: */ + } + } + } +/* L120: */ + } + +/* L130: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L140: + + if (l == i__) { + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + +/* + H(I-1,I-2) is negligible: a pair of eigenvalues have converged. + + Transform the 2-by-2 submatrix to standard Schur form, + and compute and store the eigenvalues. +*/ + + dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * + h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * + h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, + &sn); + + if (*wantt) { + +/* Apply the transformation to the rest of H. */ + + if (i2 > i__) { + i__1 = i2 - i__; + drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ + i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * + h_dim1], &c__1, &cs, &sn); + } + if (*wantz) { + +/* Apply the transformation to Z. */ + + drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + + i__ * z_dim1], &c__1, &cs, &sn); + } + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L10; + +L150: + return 0; + +/* End of DLAHQR */ + +} /* dlahqr_ */ + +/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal * + a, integer *lda, doublereal *tau, doublereal *t, integer *ldt, + doublereal *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublereal d__1; + + /* Local variables */ + static integer i__; + static doublereal ei; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dcopy_(integer *, doublereal *, + integer *, doublereal *, integer *), daxpy_(integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char + *, char *, char *, integer *, doublereal *, integer *, doublereal + *, integer *), dlarfg_(integer *, + doublereal *, doublereal *, integer *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an orthogonal similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by DGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) DOUBLE PRECISION array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) DOUBLE PRECISION array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) DOUBLE PRECISION array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a h a a a ) + ( a h a a a ) + ( a h a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(1:n,i) + + Compute i-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[* + k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1], + &c__1); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k + + i__ + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + daxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + + i__ * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* + Generate the elementary reflector H(i) to annihilate + A(k+i+1:n,i) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + +/* Compute Y(1:n,i) */ + + i__2 = *n - *k - i__ + 1; + dgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ * + t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1); + dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); + +/* Compute T(1:i,i) */ + + i__2 = i__ - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + + return 0; + +/* End of DLAHRD */ + +} /* dlahrd_ */ + +/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw, + doublereal *smin, doublereal *ca, doublereal *a, integer *lda, + doublereal *d1, doublereal *d2, doublereal *b, integer *ldb, + doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + /* Initialized data */ + + static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, + 4,3,2,1 }; + + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + + /* Local variables */ + static integer j; +#define ci (equiv_0) +#define cr (equiv_1) + static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, + cr21, cr22, li21, csi, ui11, lr21, ui12, ui22; +#define civ (equiv_0) + static doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + static integer icmax; + static doublereal bnorm, cnorm, smini; + + extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLALN2 solves a system of the form (ca A - w D ) X = s B + or (ca A' - w D) X = s B with possible scaling ("s") and + perturbation of A. (A' means A-transpose.) + + A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + real diagonal matrix, w is a real or complex value, and X and B are + NA x 1 matrices -- real if w is real, complex if w is complex. NA + may be 1 or 2. + + If w is complex, X and B are represented as NA x 2 matrices, + the first column of each being the real part and the second + being the imaginary part. + + "s" is a scaling factor (.LE. 1), computed by DLALN2, which is + so chosen that X can be computed without overflow. X is further + scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + than overflow. + + If both singular values of (ca A - w D) are less than SMIN, + SMIN*identity will be used instead of (ca A - w D). If only one + singular value is less than SMIN, one element of (ca A - w D) will be + perturbed enough to make the smallest singular value roughly SMIN. + If both singular values are at least SMIN, (ca A - w D) will not be + perturbed. In any case, the perturbation will be at most some small + multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + are computed by infinity-norm approximations, and thus will only be + correct to a factor of 2 or so. + + Note: all input quantities are assumed to be smaller than overflow + by a reasonable factor. (See BIGNUM.) + + Arguments + ========== + + LTRANS (input) LOGICAL + =.TRUE.: A-transpose will be used. + =.FALSE.: A will be used (not transposed.) + + NA (input) INTEGER + The size of the matrix A. It may (only) be 1 or 2. + + NW (input) INTEGER + 1 if "w" is real, 2 if "w" is complex. It may only be 1 + or 2. + + SMIN (input) DOUBLE PRECISION + The desired lower bound on the singular values of A. This + should be a safe distance away from underflow or overflow, + say, between (underflow/machine precision) and (machine + precision * overflow ). (See BIGNUM and ULP.) + + CA (input) DOUBLE PRECISION + The coefficient c, which A is multiplied by. + + A (input) DOUBLE PRECISION array, dimension (LDA,NA) + The NA x NA matrix A. + + LDA (input) INTEGER + The leading dimension of A. It must be at least NA. + + D1 (input) DOUBLE PRECISION + The 1,1 element in the diagonal matrix D. + + D2 (input) DOUBLE PRECISION + The 2,2 element in the diagonal matrix D. Not used if NW=1. + + B (input) DOUBLE PRECISION array, dimension (LDB,NW) + The NA x NW matrix B (right-hand side). If NW=2 ("w" is + complex), column 1 contains the real part of B and column 2 + contains the imaginary part. + + LDB (input) INTEGER + The leading dimension of B. It must be at least NA. + + WR (input) DOUBLE PRECISION + The real part of the scalar "w". + + WI (input) DOUBLE PRECISION + The imaginary part of the scalar "w". Not used if NW=1. + + X (output) DOUBLE PRECISION array, dimension (LDX,NW) + The NA x NW matrix X (unknowns), as computed by DLALN2. + If NW=2 ("w" is complex), on exit, column 1 will contain + the real part of X and column 2 will contain the imaginary + part. + + LDX (input) INTEGER + The leading dimension of X. It must be at least NA. + + SCALE (output) DOUBLE PRECISION + The scale factor that B must be multiplied by to insure + that overflow does not occur when computing X. Thus, + (ca A - w D) X will be SCALE*B, not B (ignoring + perturbations of A.) It will be at most 1. + + XNORM (output) DOUBLE PRECISION + The infinity-norm of X, when X is regarded as an NA x NW + real matrix. + + INFO (output) INTEGER + An error flag. It will be set to zero if no error occurs, + a negative number if an argument is in error, or a positive + number if ca A - w D had to be perturbed. + The possible values are: + = 0: No error occurred, and (ca A - w D) did not have to be + perturbed. + = 1: (ca A - w D) had to be perturbed to make its smallest + (or only) singular value greater than SMIN. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Compute BIGNUM */ + + smlnum = 2. * SAFEMINIMUM; + bignum = 1. / smlnum; + smini = max(*smin,smlnum); + +/* Don't check for input errors */ + + *info = 0; + +/* Standard Initializations */ + + *scale = 1.; + + if (*na == 1) { + +/* 1 x 1 (i.e., scalar) system C X = B */ + + if (*nw == 1) { + +/* + Real 1x1 system. + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + +/* + Complex 1x1 system (w is complex) + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << + 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + +/* Compute X */ + + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << + 1) + 1], abs(d__2)); + } + + } else { + +/* + 2x2 System + + Compute the real part of C = ca A - w D (or ca A' - w D ) +*/ + + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + + if (*nw == 1) { + +/* + Real 2x2 system (w is real) + + Find the largest element in C +*/ + + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } +/* L10: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[ + b_dim1 + 2], abs(d__2)); + bnorm = max(d__3,d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + +/* If smaller pivot < SMINI, use SMINI */ + + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; +/* Computing MAX */ + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2,d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } +/* Computing MAX */ + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + +/* + Complex 2x2 system (w is complex) + + Find the largest element in C +*/ + + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs( + d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1] + , abs(d__2)); + icmax = j; + } +/* L20: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 + << 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2], + abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5,d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + +/* Code when off-diagonals of pivoted C are real */ + + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; +/* Computing 2nd power */ + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; +/* Computing 2nd power */ + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + +/* Code when diagonals of pivoted C are real */ + + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + +/* If smaller pivot < SMINI, use SMINI */ + + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; +/* Computing MAX */ + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r)) + ), d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1,d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + + dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } +/* Computing MAX */ + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1,d__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + + return 0; + +/* End of DLALN2 */ + +} /* dlaln2_ */ + +#undef crv +#undef civ +#undef cr +#undef ci + + +/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, + difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, + poles_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, m, n; + static doublereal dj; + static integer nlp1; + static doublereal temp; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *); + static doublereal dsigjp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + December 1, 1999 + + + Purpose + ======= + + DLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) DOUBLE PRECISION array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) DOUBLE PRECISION array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) DOUBLE PRECISION array, dimension ( K ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + dscal_(nrhs, &c_b151, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + work[j] = 0.; + } else { + work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / + (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + work[1] = -1.; + temp = dnrm2_(k, &work[1], &c__1); + dgemv_("T", k, nrhs, &c_b15, &bx[bx_offset], ldbx, &work[1], & + c__1, &c_b29, &b[j + b_dim1], ldb); + dlascl_("G", &c__0, &c__0, &temp, &c_b15, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L50: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + work[j] = 0.; + } else { + work[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L70: */ + } + dgemv_("T", k, nrhs, &c_b15, &b[b_offset], ldb, &work[1], & + c__1, &c_b29, &bx[j + bx_dim1], ldbx); +/* L80: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L90: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L100: */ + } + } + + return 0; + +/* End of DLALS0 */ + +} /* dlals0_ */ + +/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, + b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, + difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, + u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, + i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, nlvl, sqre; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer inode, ndiml, ndimr; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlals0_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *), dlasdt_(integer *, integer *, integer *, integer *, + integer *, integer *, integer *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, DLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by DLALSA. + + Arguments + ========= + + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) DOUBLE PRECISION array. + The dimension must be at least N. + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 50. +*/ + + if (*icompq == 1) { + goto L50; + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + dgemm_("T", "N", &nl, nrhs, &nl, &c_b15, &u[nlf + u_dim1], ldu, &b[ + nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + dgemm_("T", "N", &nr, nrhs, &nr, &c_b15, &u[nrf + u_dim1], ldu, &b[ + nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L10: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L20: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L30: */ + } +/* L40: */ + } + goto L90; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L50: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L60: */ + } +/* L70: */ + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b15, &vt[nlf + vt_dim1], ldu, + &b[nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx); + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b15, &vt[nrf + vt_dim1], ldu, + &b[nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx); +/* L80: */ + } + +L90: + + return 0; + +/* End of DLALSA */ + +} /* dlalsa_ */ + +/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer c__, i__, j, k; + static doublereal r__; + static integer s, u, z__; + static doublereal cs; + static integer bx; + static doublereal sn; + static integer st, vt, nm1, st1; + static doublereal eps; + static integer iwk; + static doublereal tol; + static integer difl, difr, perm, nsub; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer nlvl, sqre, bxst; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *); + static integer poles, sizei, nsize, nwork, icmpq1, icmpq2; + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlalsa_(integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static doublereal orgnrm; + static integer givnum, givptr, smlszp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input) DOUBLE PRECISION array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) DOUBLE PRECISION + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) DOUBLE PRECISION array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), + where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). + + IWORK (workspace) INTEGER array, dimension at least + (3*N*NLVL + 11*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLALSD", &i__1); + return 0; + } + + eps = EPSILON; + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + *rcond = eps; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + } else { + *rank = 1; + dlascl_("G", &c__0, &c__0, &d__[1], &c_b15, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + work[(i__ << 1) - 1] = cs; + work[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = work[(j << 1) - 1]; + sn = work[j * 2]; + drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * + b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + dlaset_("A", n, nrhs, &c_b29, &c_b29, &b[b_offset], ldb); + return 0; + } + + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + nwork = *n * *n + 1; + dlaset_("A", n, n, &c_b29, &c_b15, &work[1], n); + dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & + work[1], n, &b[b_offset], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[i__ + b_dim1], + ldb); + } else { + dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &b[ + i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L40: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b15, &work[1], n, &b[b_offset], ldb, & + c_b29, &work[nwork], n); + dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, + info); + dlasrt_("D", n, &d__[1], info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + bx = givnum + (nlvl << 1) * *n; + nwork = bx + *n * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L50: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + dlaset_("A", &nsize, &nsize, &c_b29, &c_b15, &work[vt + st1], + n); + dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ + st], &work[vt + st1], n, &work[nwork], n, &b[st + + b_dim1], ldb, &work[nwork], info); + if (*info != 0) { + return 0; + } + dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + work[u + st1], n, &work[vt + st1], &iwork[k + st1], & + work[difl + st1], &work[difr + st1], &work[z__ + st1], + &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + + st1], &work[c__ + st1], &work[s + st1], &work[nwork], + &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &work[u + st1], n, &work[vt + st1], & + iwork[k + st1], &work[difl + st1], &work[difr + st1], + &work[z__ + st1], &work[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & + work[givnum + st1], &work[c__ + st1], &work[s + st1], + &work[nwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L60: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &work[bx + i__ - 1], n); + } else { + ++(*rank); + dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &work[ + bx + i__ - 1], n, info); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L70: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b15, &work[vt + st1], n, + &work[bxst], n, &c_b29, &b[st + b_dim1], ldb); + } else { + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ + k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], + &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ + iwk], info); + if (*info != 0) { + return 0; + } + } +/* L80: */ + } + +/* Unscale and sort the singular values. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, info); + dlasrt_("D", n, &d__[1], info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of DLALSD */ + +} /* dlalsd_ */ + +/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ind1, ind2, n1sv, n2sv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAMRG will create a permutation list which will merge the elements + of A (which is composed of two independently sorted sets) into a + single set which is sorted in ascending order. + + Arguments + ========= + + N1 (input) INTEGER + N2 (input) INTEGER + These arguements contain the respective lengths of the two + sorted lists to be merged. + + A (input) DOUBLE PRECISION array, dimension (N1+N2) + The first N1 elements of A contain a list of numbers which + are sorted in either ascending or descending order. Likewise + for the final N2 elements. + + DTRD1 (input) INTEGER + DTRD2 (input) INTEGER + These are the strides to be taken through the array A. + Allowable strides are 1 and -1. They indicate whether a + subset of A is sorted in ascending (DTRDx = 1) or descending + (DTRDx = -1) order. + + INDEX (output) INTEGER array, dimension (N1+N2) + On exit this array will contain a permutation such that + if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be + sorted in ascending order. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*dtrd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*dtrd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; +/* L30: */ + } + } + + return 0; + +/* End of DLAMRG */ + +} /* dlamrg_ */ + +doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real matrix A. + + Description + =========== + + DLANGE returns the value + + DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + DLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + DLANGE is set to zero. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGE */ + +} /* dlange_ */ + +doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda, + doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLANHS returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + Hessenberg matrix A. + + Description + =========== + + DLANHS returns the value + + DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANHS as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, DLANHS is + set to zero. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The n by n upper Hessenberg matrix A; the part of A below the + first sub-diagonal is not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= N when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANHS */ + +} /* dlanhs_ */ + +doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal anorm; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLANST returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric tridiagonal matrix A. + + Description + =========== + + DLANST returns the value + + DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANST as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, DLANST is + set to zero. + + D (input) DOUBLE PRECISION array, dimension (N) + The diagonal elements of A. + + E (input) DOUBLE PRECISION array, dimension (N-1) + The (n-1) sub-diagonal or super-diagonal elements of A. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], abs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* Computing MAX */ + d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1)); + anorm = max(d__2,d__3); +/* L10: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1' || lsame_(norm, "I")) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { +/* Computing MAX */ + d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs( + d__1)) + (d__2 = d__[*n], abs(d__2)); + anorm = max(d__3,d__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[ + i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3)); + anorm = max(d__4,d__5); +/* L20: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*n > 1) { + i__1 = *n - 1; + dlassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + dlassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of DLANST */ + +} /* dlanst_ */ + +doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, absa, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLANSY returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric matrix A. + + Description + =========== + + DLANSY returns the value + + DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in DLANSY as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, DLANSY is + set to zero. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The symmetric matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs( + d__1)); + value = max(d__2,d__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + dlassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSY */ + +} /* dlansy_ */ + +/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r, + doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *), sqrt(doublereal); + + /* Local variables */ + static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, + temp, scale, bcmax, bcmis, sigma; + + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric + matrix in standard form: + + [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + + where either + 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + conjugate eigenvalues. + + Arguments + ========= + + A (input/output) DOUBLE PRECISION + B (input/output) DOUBLE PRECISION + C (input/output) DOUBLE PRECISION + D (input/output) DOUBLE PRECISION + On entry, the elements of the input matrix. + On exit, they are overwritten by the elements of the + standardised Schur form. + + RT1R (output) DOUBLE PRECISION + RT1I (output) DOUBLE PRECISION + RT2R (output) DOUBLE PRECISION + RT2I (output) DOUBLE PRECISION + The real and imaginary parts of the eigenvalues. If the + eigenvalues are a complex conjugate pair, RT1I > 0. + + CS (output) DOUBLE PRECISION + SN (output) DOUBLE PRECISION + Parameters of the rotation matrix. + + Further Details + =============== + + Modified by V. Sima, Research Institute for Informatics, Bucharest, + Romania, to reduce the risk of cancellation errors, + when computing real eigenvalues, and to ensure, if possible, that + abs(RT1R) >= abs(RT2R). + + ===================================================================== +*/ + + + eps = PRECISION; + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + goto L10; + + } else if (*b == 0.) { + +/* Swap rows and columns */ + + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + goto L10; + } else if (*a - *d__ == 0. && d_sign(&c_b15, b) != d_sign(&c_b15, c__)) { + *cs = 1.; + *sn = 0.; + goto L10; + } else { + + temp = *a - *d__; + p = temp * .5; +/* Computing MAX */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1,d__2) * d_sign(&c_b15, b) * d_sign(&c_b15, c__); +/* Computing MAX */ + d__1 = abs(p); + scale = max(d__1,bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + +/* + If Z is of the order of the machine accuracy, postpone the + decision on the nature of eigenvalues +*/ + + if (z__ >= eps * 4.) { + +/* Real eigenvalues. Compute A and D. */ + + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + +/* Compute B and the rotation matrix */ + + tau = dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + +/* + Complex eigenvalues, or real (almost) equal eigenvalues. + Make diagonal elements equal. +*/ + + sigma = *b + *c__; + tau = dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_sign(&c_b15, &sigma); + +/* + Compute [ AA BB ] = [ A B ] [ CS -SN ] + [ CC DD ] [ C D ] [ SN CS ] +*/ + + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + +/* + Compute [ A B ] = [ CS SN ] [ AA BB ] + [ C D ] [-SN CS ] [ CC DD ] +*/ + + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + + if (*c__ != 0.) { + if (*b != 0.) { + if (d_sign(&c_b15, b) == d_sign(&c_b15, c__)) { + +/* Real eigenvalues: reduce to upper triangular form */ + + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + + } + +L10: + +/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ + + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; + +/* End of DLANV2 */ + +} /* dlanv2_ */ + +doublereal dlapy2_(doublereal *x, doublereal *y) +{ + /* System generated locals */ + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal w, z__, xabs, yabs; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary + overflow. + + Arguments + ========= + + X (input) DOUBLE PRECISION + Y (input) DOUBLE PRECISION + X and Y specify the values x and y. + + ===================================================================== +*/ + + + xabs = abs(*x); + yabs = abs(*y); + w = max(xabs,yabs); + z__ = min(xabs,yabs); + if (z__ == 0.) { + ret_val = w; + } else { +/* Computing 2nd power */ + d__1 = z__ / w; + ret_val = w * sqrt(d__1 * d__1 + 1.); + } + return ret_val; + +/* End of DLAPY2 */ + +} /* dlapy2_ */ + +doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal w, xabs, yabs, zabs; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause + unnecessary overflow. + + Arguments + ========= + + X (input) DOUBLE PRECISION + Y (input) DOUBLE PRECISION + Z (input) DOUBLE PRECISION + X, Y and Z specify the values x, y and z. + + ===================================================================== +*/ + + + xabs = abs(*x); + yabs = abs(*y); + zabs = abs(*z__); +/* Computing MAX */ + d__1 = max(xabs,yabs); + w = max(d__1,zabs); + if (w == 0.) { + ret_val = 0.; + } else { +/* Computing 2nd power */ + d__1 = xabs / w; +/* Computing 2nd power */ + d__2 = yabs / w; +/* Computing 2nd power */ + d__3 = zabs / w; + ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); + } + return ret_val; + +/* End of DLAPY3 */ + +} /* dlapy3_ */ + +/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLARF applies a real elementary reflector H to a real m by n matrix + C, from either the left or the right. H is represented in the form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) DOUBLE PRECISION array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) DOUBLE PRECISION + The value tau in the representation of H. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (lsame_(side, "L")) { + +/* Form H * C */ + + if (*tau != 0.) { + +/* w := C' * v */ + + dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], + incv, &c_b29, &work[1], &c__1); + +/* C := C - v * w' */ + + d__1 = -(*tau); + dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], + ldc); + } + } else { + +/* Form C * H */ + + if (*tau != 0.) { + +/* w := C * v */ + + dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], + incv, &c_b29, &work[1], &c__1); + +/* C := C - w * v' */ + + d__1 = -(*tau); + dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], + ldc); + } + } + return 0; + +/* End of DLARF */ + +} /* dlarf_ */ + +/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLARFB applies a real block reflector H or its transpose H' to a + real m by n matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'T': apply H' (Transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) DOUBLE PRECISION array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) DOUBLE PRECISION array, dimension (LDT,K) + The triangular k by k matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2 */ + + i__1 = *m - *k; + dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, & + c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], + ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = *m - *k; + dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151, + &v[*k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc); + } + +/* W := W * V1' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + dgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = *n - *k; + dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151, + &work[work_offset], ldwork, &v[*k + 1 + v_dim1], + ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc); + } + +/* W := W * V1' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*m > *k) { + +/* W := W + C1'*V1 */ + + i__1 = *m - *k; + dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = *m - *k; + dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151, + &v[v_offset], ldv, &work[work_offset], ldwork, & + c_b15, &c__[c_offset], ldc) + ; + } + +/* W := W * V2' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, & + v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + dgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = *n - *k; + dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151, + &work[work_offset], ldwork, &v[v_offset], ldv, & + c_b15, &c__[c_offset], ldc) + ; + } + +/* W := W * V2' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, & + v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2' */ + + i__1 = *m - *k; + dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, & + c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = *m - *k; + dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[ + (*k + 1) * v_dim1 + 1], ldv, &work[work_offset], + ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc); + } + +/* W := W * V1 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2' */ + + i__1 = *n - *k; + dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, & + c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + dgemm_("No transpose", "No transpose", m, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, & + v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork); + if (*m > *k) { + +/* W := W + C1'*V1' */ + + i__1 = *m - *k; + dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = *m - *k; + dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[ + v_offset], ldv, &work[work_offset], ldwork, & + c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, & + v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork); + if (*n > *k) { + +/* W := W + C1 * V1' */ + + i__1 = *n - *k; + dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + dgemm_("No transpose", "No transpose", m, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of DLARFB */ + +} /* dlarfb_ */ + +/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer j, knt; + static doublereal beta; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal xnorm; + + static doublereal safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLARFG generates a real elementary reflector H of order n, such + that + + H * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, and x is an (n-1)-element real + vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a real scalar and v is a real (n-1)-element + vector. + + If the elements of x are all zero, then tau = 0 and H is taken to be + the unit matrix. + + Otherwise 1 <= tau <= 2. + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) DOUBLE PRECISION + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) DOUBLE PRECISION array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) DOUBLE PRECISION + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = I */ + + *tau = 0.; + } else { + +/* general case */ + + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + safmin = SAFEMINIMUM / EPSILON; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; + knt = 0; +L10: + ++knt; + i__1 = *n - 1; + dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + dscal_(&i__1, &d__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + *alpha = beta; + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + *alpha *= safmin; +/* L20: */ + } + } else { + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + dscal_(&i__1, &d__1, &x[1], incx); + *alpha = beta; + } + } + + return 0; + +/* End of DLARFG */ + +} /* dlarfg_ */ + +/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + static integer i__, j; + static doublereal vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), dtrmv_(char *, + char *, char *, integer *, doublereal *, integer *, doublereal *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLARFT forms the triangular factor T of a real block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) DOUBLE PRECISION array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) DOUBLE PRECISION array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.; + if (lsame_(storev, "C")) { + +/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[ + i__ * t_dim1 + 1], &c__1); + } else { + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ + + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b29, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L20: */ + } + } else { + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.; + +/* + T(i+1:k,i) := + - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +*/ + + i__1 = *n - *k + i__; + i__2 = *k - i__; + d__1 = -tau[i__]; + dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1) + * v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], & + c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.; + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +*/ + + i__1 = *k - i__; + i__2 = *n - *k + i__; + d__1 = -tau[i__]; + dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & + c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of DLARFT */ + +} /* dlarft_ */ + +/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal * + v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + doublereal d__1; + + /* Local variables */ + static integer j; + static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, + v6, v7, v8, v9, t10, v10, sum; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLARFX applies a real elementary reflector H to a real m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) DOUBLE PRECISION + The value tau in the representation of H. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= (1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (*tau == 0.) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* + Code for general M + + w := C'*v +*/ + + dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, & + c_b29, &work[1], &c__1); + +/* C := C - tau * v * w' */ + + d__1 = -(*tau); + dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc) + ; + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* + Code for general N + + w := C * v +*/ + + dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], & + c__1, &c_b29, &work[1], &c__1); + +/* C := C - tau * w * v' */ + + d__1 = -(*tau); + dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc) + ; + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of DLARFX */ + +} /* dlarfx_ */ + +/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); + + /* Local variables */ + static integer i__; + static doublereal f1, g1, eps, scale; + static integer count; + static doublereal safmn2, safmx2; + + static doublereal safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLARTG generate a plane rotation so that + + [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a slower, more accurate version of the BLAS1 routine DROTG, + with the following other differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any + floating point operations (saves work in DBDSQR when + there are zeros on the diagonal). + + If F exceeds G in magnitude, CS will be positive. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The first component of vector to be rotated. + + G (input) DOUBLE PRECISION + The second component of vector to be rotated. + + CS (output) DOUBLE PRECISION + The cosine of the rotation. + + SN (output) DOUBLE PRECISION + The sine of the rotation. + + R (output) DOUBLE PRECISION + The nonzero component of the rotated vector. + + ===================================================================== +*/ + + + if (first) { + first = FALSE_; + safmin = SAFEMINIMUM; + eps = EPSILON; + d__1 = BASE; + i__1 = (integer) (log(safmin / eps) / log(BASE) / + 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; + } + if (*g == 0.) { + *cs = 1.; + *sn = 0.; + *r__ = *f; + } else if (*f == 0.) { + *cs = 0.; + *sn = 1.; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (abs(*f) > abs(*g) && *cs < 0.) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of DLARTG */ + +} /* dlartg_ */ + +/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DLAS2 computes the singular values of the 2-by-2 matrix + [ F G ] + [ 0 H ]. + On return, SSMIN is the smaller singular value and SSMAX is the + larger singular value. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + G (input) DOUBLE PRECISION + The (1,2) element of the 2-by-2 matrix. + + H (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) DOUBLE PRECISION + The smaller singular value. + + SSMAX (output) DOUBLE PRECISION + The larger singular value. + + Further Details + =============== + + Barring over/underflow, all output quantities are correct to within + a few units in the last place (ulps), even in the absence of a guard + digit in addition/subtraction. + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows, or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ==================================================================== +*/ + + + fa = abs(*f); + ga = abs(*g); + ha = abs(*h__); + fhmn = min(fa,ha); + fhmx = max(fa,ha); + if (fhmn == 0.) { + *ssmin = 0.; + if (fhmx == 0.) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + d__1 = min(fhmx,ga) / max(fhmx,ga); + *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = ga / fhmx; + au = d__1 * d__1; + c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.) { + +/* + Avoid possible harmful underflow if exponent range + asymmetric (true SSMIN may not underflow even if + AU underflows) +*/ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = as * au; +/* Computing 2nd power */ + d__2 = at * au; + c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of DLAS2 */ + +} /* dlas2_ */ + +/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static doublereal mul, cto1; + static logical done; + static doublereal ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static doublereal cfrom1; + + static doublereal cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLASCL multiplies the M by N real matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) DOUBLE PRECISION + CTO (input) DOUBLE PRECISION + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,M) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.) { + *info = -4; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + cto1 = ctoc / bignum; + if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of DLASCL */ + +} /* dlascl_ */ + +/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__, + doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer * + ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer * + info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, + iwk, lvl, ndb1, nlp1, nrp1; + static doublereal beta; + static integer idxq, nlvl; + static doublereal alpha; + static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, + integer *), dlasdq_(char *, integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), xerbla_( + char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + Using a divide and conquer approach, DLASD0 computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M + matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + The algorithm computes orthogonal matrices U and VT such that + B = U * S * VT. The singular values S are overwritten on D. + + A related subroutine, DLASDA, computes only the singular values, + and optionally, the singular vectors in compact form. + + Arguments + ========= + + N (input) INTEGER + On entry, the row dimension of the upper bidiagonal matrix. + This is also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N+1; + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. + On exit D, if INFO = 0, contains its singular values. + + E (input) DOUBLE PRECISION array, dimension (M-1) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, dimension at least (LDQ, N) + On exit, U contains the left singular vectors. + + LDU (input) INTEGER + On entry, leading dimension of U. + + VT (output) DOUBLE PRECISION array, dimension at least (LDVT, M) + On exit, VT' contains the right singular vectors. + + LDVT (input) INTEGER + On entry, leading dimension of VT. + + SMLSIZ (input) INTEGER + On entry, maximum size of the subproblems at the + bottom of the computation tree. + + IWORK INTEGER work array. + Dimension must be at least (8 * N) + + WORK DOUBLE PRECISION work array. + Dimension must be at least (3 * M**2 + 2 * M) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + + m = *n + *sqre; + + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD0", &i__1); + return 0; + } + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], + ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); + return 0; + } + +/* Set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + For the nodes on bottom level of the tree, solve + their subproblems by DLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ + nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ + nlf + nlf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; +/* L10: */ + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ + nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ + nrf + nrf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + for (lvl = nlvl; lvl >= 1; --lvl) { + +/* + Find the first node LF and last node LL on the + current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * + u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ + idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASD0 */ + +} /* dlasd0_ */ + +/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre, + doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer * + iwork, doublereal *work, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, + idxc, idxp, ldvt2; + extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), dlasd3_( + integer *, integer *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *), + dlamrg_(integer *, integer *, doublereal *, integer *, integer *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal orgnrm; + static integer coltyp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, + where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. + + A related subroutine DLASD7 handles the case in which the singular + values (and the singular vectors in factored form) are desired. + + DLASD1 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The left singular vectors of the original matrix are stored in U, and + the transpose of the right singular vectors are stored in VT, and the + singular values are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or when there are zeros in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLASD2. + + The second stage consists of calculating the updated + singular values. This is done by finding the square roots of the + roots of the secular equation via the routine DLASD4 (as called + by DLASD3). This routine also calculates the singular vectors of + the current problem. + + The final stage consists of computing the updated singular vectors + directly using the updated singular values. The singular vectors + for the current problem are multiplied with the singular vectors + from the overall problem. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) DOUBLE PRECISION array, + dimension (N = NL+NR+1). + On entry D(1:NL,1:NL) contains the singular values of the + upper block; and D(NL+2:N) contains the singular values of + the lower block. On exit D(1:N) contains the singular values + of the modified matrix. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + U (input/output) DOUBLE PRECISION array, dimension(LDU,N) + On entry U(1:NL, 1:NL) contains the left singular vectors of + the upper block; U(NL+2:N, NL+2:N) contains the left singular + vectors of the lower block. On exit U contains the left + singular vectors of the bidiagonal matrix. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max( 1, N ). + + VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) + where M = N + SQRE. + On entry VT(1:NL+1, 1:NL+1)' contains the right singular + vectors of the upper block; VT(NL+2:M, NL+2:M)' contains + the right singular vectors of the lower block. On exit + VT' contains the right singular vectors of the + bidiagonal matrix. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= max( 1, M ). + + IDXQ (output) INTEGER array, dimension(N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension( 4 * N ) + + WORK (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD1", &i__1); + return 0; + } + + n = *nl + *nr + 1; + m = n + *sqre; + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLASD2 and DLASD3. +*/ + + ldu2 = n; + ldvt2 = m; + + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + +/* + Scale. + + Computing MAX +*/ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Deflate singular values. */ + + dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], + ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & + work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & + idxq[1], &iwork[coltyp], info); + +/* Solve Secular Equation and update singular vectors. */ + + ldq = k; + dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ + u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ + ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = k; + n2 = n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD1 */ + +} /* dlasd1_ */ + +/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal * + beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer * + idxq, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, + vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal c__; + static integer i__, j, m, n; + static doublereal s; + static integer k2; + static doublereal z1; + static integer ct, jp; + static doublereal eps, tau, tol; + static integer psm[4], nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer ctot[4], idxjp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer jprev; + + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + static doublereal hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASD2 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + singular values are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + DLASD2 is called from DLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + D (input/output) DOUBLE PRECISION array, dimension(N) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + U (input/output) DOUBLE PRECISION array, dimension(LDU,N) + On entry U contains the left singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL, NL), and (NL+2, NL+2), (N,N). + On exit U contains the trailing (N-K) updated left singular + vectors (those which were deflated) in its last N-K columns. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + Z (output) DOUBLE PRECISION array, dimension(N) + On exit Z contains the updating row vector in the secular + equation. + + DSIGMA (output) DOUBLE PRECISION array, dimension (N) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + U2 (output) DOUBLE PRECISION array, dimension(LDU2,N) + Contains a copy of the first K-1 left singular vectors which + will be used by DLASD3 in a matrix multiply (DGEMM) to solve + for the new left singular vectors. U2 is arranged into four + blocks. The first block contains a column with 1 at NL+1 and + zero everywhere else; the second block contains non-zero + entries only at and above NL; the third contains non-zero + entries only below NL+1; and the fourth is dense. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M) + On entry VT' contains the right singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL+1, NL+1), and (NL+2, NL+2), (M,M). + On exit VT' contains the trailing (N-K) updated right singular + vectors (those which were deflated) in its last N-K columns. + In case SQRE =1, the last row of VT spans the right null + space. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= M. + + VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N) + VT2' contains a copy of the first K right singular vectors + which will be used by DLASD3 in a matrix multiply (DGEMM) to + solve for the new right singular vectors. VT2 is arranged into + three blocks. The first block contains a row that corresponds + to the special 0 diagonal element in SIGMA; the second block + contains non-zeros only at and before NL +1; the third block + contains non-zeros only at and after NL +2. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= M. + + IDXP (workspace) INTEGER array, dimension(N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDX (workspace) INTEGER array, dimension(N) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXC (output) INTEGER array, dimension(N) + This will contain the permutation used to arrange the columns + of the deflated U matrix into three groups: the first group + contains non-zero entries only at and above NL, the second + contains non-zero entries only below NL+2, and the third is + dense. + + COLTYP (workspace/output) INTEGER array, dimension(N) + As workspace, this will contain a label which will indicate + which of the following types a column in the U2 matrix or a + row in the VT2 matrix is: + 1 : non-zero in the upper half only + 2 : non-zero in the lower half only + 3 : dense + 4 : deflated + + On exit, it is an array of dimension 4, with COLTYP(I) being + the dimension of the I-th type columns. + + IDXQ (input) INTEGER array, dimension(N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first hlaf of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD2", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + +/* + Generate the first part of the vector Z; and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; +/* L20: */ + } + +/* Initialize some reference arrays. */ + + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L30: */ + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; +/* L40: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L50: */ + } + +/* + DSIGMA, IDXC, IDXC, and the first column of U2 + are used as storage space. +*/ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; +/* L60: */ + } + + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; +/* L70: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = EPSILON; +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } +/* L80: */ + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + s = z__[jprev]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + +/* + Apply back the Givens rotation to the left and right + singular vector matrices. +*/ + + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & + c__1, &c__, &s); + drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & + c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + +/* Record the last singular value. */ + + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L120: + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four groups of uniform structure (although one or more of these + groups may be empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L130: */ + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L140: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + +/* + Fill out the IDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's, starting from the + second column. This applies similarly to the rows of VT. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L150: */ + } + +/* + Sort the singular values and corresponding singular vectors into + DSIGMA, U2, and VT2 respectively. The singular values/vectors + which were not deflated go into the first K slots of DSIGMA, U2, + and VT2 respectively, while those which were deflated go into the + last N - K slots, except that the first column/row will be treated + separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); +/* L160: */ + } + +/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Move the rest of the updating row to Z. */ + + i__1 = *k - 1; + dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + +/* + Determine the first column of U2, the first row of VT2 and the + last row of VT. +*/ + + dlaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; +/* L170: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; +/* L180: */ + } + } else { + dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + +/* + The deflated singular values and their corresponding vectors go + into the back of D, U, and V respectively. +*/ + + if (n > *k) { + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) + * u_dim1 + 1], ldu); + i__1 = n - *k; + dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + + vt_dim1], ldvt); + } + +/* Copy CTOT into COLTYP for referencing in DLASD3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L190: */ + } + + return 0; + +/* End of DLASD2 */ + +} /* dlasd2_ */ + +/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, + doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, + doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, + vt_offset, vt2_dim1, vt2_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer i__, j, m, n, jc; + static doublereal rho; + static integer nlp1, nlp2, nrp1; + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer ctemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer ktemp; + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASD3 finds all the square roots of the roots of the secular + equation, as defined by the values in D and Z. It makes the + appropriate calls to DLASD4 and then updates the singular + vectors by matrix multiplication. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + DLASD3 is called from DLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (input) INTEGER + The size of the secular equation, 1 =< K = < N. + + D (output) DOUBLE PRECISION array, dimension(K) + On exit the square roots of the roots of the secular equation, + in ascending order. + + Q (workspace) DOUBLE PRECISION array, + dimension at least (LDQ,K). + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= K. + + DSIGMA (input) DOUBLE PRECISION array, dimension(K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + U (input) DOUBLE PRECISION array, dimension (LDU, N) + The last N - K columns of this matrix contain the deflated + left singular vectors. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + U2 (input) DOUBLE PRECISION array, dimension (LDU2, N) + The first K columns of this matrix contain the non-deflated + left singular vectors for the split problem. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (input) DOUBLE PRECISION array, dimension (LDVT, M) + The last M - K columns of VT' contain the deflated + right singular vectors. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= N. + + VT2 (input) DOUBLE PRECISION array, dimension (LDVT2, N) + The first K columns of VT2' contain the non-deflated + right singular vectors for the split problem. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= N. + + IDXC (input) INTEGER array, dimension ( N ) + The permutation used to arrange the columns of U (and rows of + VT) into three groups: the first group contains non-zero + entries only at and above (or before) NL +1; the second + contains non-zero entries only at and below (or after) NL+2; + and the third is dense. The first column of U and the row of + VT are treated separately, however. + + The rows of the singular vectors found by DLASD4 + must be likewise permuted before the matrix multiplies can + take place. + + CTOT (input) INTEGER array, dimension ( 4 ) + A count of the total number of the various types of columns + in U (or rows in VT), as described in IDXC. The fourth column + type is any column which has been deflated. + + Z (input) DOUBLE PRECISION array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; +/* L10: */ + } + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L20: */ + } + +/* Keep a copy of Z. */ + + dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + +/* Normalize Z. */ + + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Find the new singular values. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], + &vt[j * vt_dim1 + 1], info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } +/* L30: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); +/* L40: */ + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); +/* L50: */ + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); +/* L60: */ + } + +/* + Compute left singular vectors of the modified diagonal matrix, + and store related information for the right singular vectors. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * + vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ + * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; +/* L70: */ + } + temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; +/* L80: */ + } +/* L90: */ + } + +/* Update the left singular vector matrix. */ + + if (*k == 2) { + dgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset], + ldq, &c_b29, &u[u_offset], ldu); + goto L100; + } + if (ctot[1] > 0) { + dgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1], + ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1] + , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1], + ldu); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1], + ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu); + } else { + dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); + } + dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + dgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu); + +/* Generate the right singular vectors. */ + +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; +/* L110: */ + } +/* L120: */ + } + +/* Update the right singular vector matrix. */ + + if (*k == 2) { + dgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset] + , ldvt2, &c_b29, &vt[vt_offset], ldvt); + return 0; + } + ktemp = ctot[1] + 1; + dgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[ + vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1], + ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1], + ldvt); + } + + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; +/* L130: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; +/* L140: */ + } + } + ctemp = ctot[2] + 1 + ctot[3]; + dgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, & + vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 + + 1], ldvt); + + return 0; + +/* End of DLASD3 */ + +} /* dlasd3_ */ + +/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal a, b, c__; + static integer j; + static doublereal w, dd[3]; + static integer ii; + static doublereal dw, zz[3]; + static integer ip1; + static doublereal eta, phi, eps, tau, psi; + static integer iim1, iip1; + static doublereal dphi, dpsi; + static integer iter; + static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, + dtiip; + static integer niter; + static doublereal dtisq; + static logical swtch; + static doublereal dtnsq; + extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *) + , dlasd5_(integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal delsq2, dtnsq1; + static logical swtch3; + + static logical orgati; + static doublereal erretm, dtipsq, rhoinv; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + This subroutine computes the square root of the I-th updated + eigenvalue of a positive symmetric rank-one modification to + a positive diagonal matrix whose entries are given as the squares + of the corresponding entries in the array d, and that + + 0 <= D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) * diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) DOUBLE PRECISION array, dimension ( N ) + The original eigenvalues. It is assumed that they are in + order, 0 <= D(I) < D(J) for I < J. + + Z (input) DOUBLE PRECISION array, dimension ( N ) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension ( N ) + If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + (singular) eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + SIGMA (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + WORK (workspace) DOUBLE PRECISION array, dimension ( N ) + If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th + component. If N = 1, then WORK( 1 ) = 1. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.; + work[1] = 1.; + return 0; + } + if (*n == 2) { + dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = EPSILON; + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* + It can be proved that + D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +*/ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* + It can be proved that + D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +*/ + + } + +/* The following ETA is to approximate SIGMA_n - D( N ) */ + + eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); + + *sigma = d__[*n] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - eta; + work[j] = d__[j] + d__[*i__] + eta; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.) { + eta /= 2.; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.; + temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + if (w > 0.) { + +/* + d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + sg2lb = 0.; + sg2ub = delsq2; + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + +/* + TAU now is an estimation of SIGMA^2 - D( I )^2. The + following, however, is the corresponding estimation of + SIGMA - D( I ). +*/ + + eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); + } else { + +/* + (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + sg2lb = -delsq2; + sg2ub = 0.; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + +/* + TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The + following, however, is the corresponding estimation of + SIGMA - D( IP1 ). +*/ + + eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau, + abs(d__1)))); + } + + if (orgati) { + ii = *i__; + *sigma = d__[*i__] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + eta; + delta[j] = d__[j] - d__[*i__] - eta; +/* L130: */ + } + } else { + ii = *i__ + 1; + *sigma = d__[ip1] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ip1] + eta; + delta[j] = d__[j] - d__[ip1] - eta; +/* L140: */ + } + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + prew = w; + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.) { + eta = (sg2ub - tau) / 2.; + } else { + eta = (sg2lb - tau) / 2.; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + + if (w <= 0.) { + sg2lb = max(sg2lb,tau); + } else { + sg2ub = min(sg2ub,tau); + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of DLASD4 */ + +} /* dlasd4_ */ + +/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal b, c__, w, del, tau, delsq; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + This subroutine computes the square root of the I-th eigenvalue + of a positive symmetric rank-one modification of a 2-by-2 diagonal + matrix + + diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + + The diagonal entries in the array D are assumed to satisfy + + 0 <= D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) DOUBLE PRECISION array, dimension ( 2 ) + The original eigenvalues. We assume 0 <= D(1) < D(2). + + Z (input) DOUBLE PRECISION array, dimension ( 2 ) + The components of the updating vector. + + DELTA (output) DOUBLE PRECISION array, dimension ( 2 ) + Contains (D(j) - lambda_I) in its j-th component. + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) DOUBLE PRECISION + The scalar in the symmetric updating formula. + + DSIGMA (output) DOUBLE PRECISION + The computed lambda_I, the I-th updated eigenvalue. + + WORK (workspace) DOUBLE PRECISION array, dimension ( 2 ) + WORK contains (D(j) + sigma_I) in its j-th component. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * + z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; + if (w > 0.) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* + B > ZERO, always + + The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +*/ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2. + tau; + work[2] = d__[1] + tau + d__[2]; +/* + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) +*/ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU +*/ + } +/* + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } + return 0; + +/* End of DLASD5 */ + +} /* dlasd5_ */ + +/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlasd7_(integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), dlasd8_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlamrg_(integer *, integer *, + doublereal *, integer *, integer *, integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal orgnrm; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASD6 computes the SVD of an updated upper bidiagonal matrix B + obtained by merging two smaller ones by appending a row. This + routine is used only for the problem which requires all singular + values and optionally singular vector matrices in factored form. + B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + A related subroutine, DLASD1, handles the case in which all singular + values and singular vectors of the bidiagonal matrix are desired. + + DLASD6 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The singular values of B can be computed using D1, D2, the first + components of all the right singular vectors of the lower block, and + the last components of all the right singular vectors of the upper + block. These components are stored and updated in VF and VL, + respectively, in DLASD6. Hence U and VT are not explicitly + referenced. + + The singular values are stored in D. The algorithm consists of two + stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or if there is a zero + in the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLASD7. + + The second stage consists of calculating the updated + singular values. This is done by finding the roots of the + secular equation via the routine DLASD4 (as called by DLASD8). + This routine also updates VF and VL and computes the distances + between the updated singular values and the old singular + values. + + DLASD6 is called from DLASDA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). + On entry D(1:NL,1:NL) contains the singular values of the + upper block, and D(NL+2:N) contains the singular values + of the lower block. On exit D(1:N) contains the singular + values of the modified matrix. + + VF (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VL (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors of + the lower block. On exit, VL contains the last components of + all right singular vectors of the bidiagonal matrix. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + IDXQ (output) INTEGER array, dimension ( N ) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM and POLES, must be at least N. + + POLES (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On exit, POLES(1,*) is an array containing the new singular + values obtained from solving the secular equation, and + POLES(2,*) is an array containing the poles in the secular + equation. Not referenced if ICOMPQ = 0. + + DIFL (output) DOUBLE PRECISION array, dimension ( N ) + On exit, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + On exit, DIFR(I, 1) is the distance between I-th updated + (undeflated) singular value and the I+1-th (undeflated) old + singular value. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + See DLASD8 for details on DIFL and DIFR. + + Z (output) DOUBLE PRECISION array, dimension ( M ) + The first elements of this array contain the components + of the deflation-adjusted updating row vector. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (output) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) + + IWORK (workspace) INTEGER array, dimension ( 3 * N ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD6", &i__1); + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLASD7 and DLASD8. +*/ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* + Scale. + + Computing MAX +*/ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD6 */ + +} /* dlasd6_ */ + +/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + static integer i__, j, m, n, k2; + static doublereal z1; + static integer jp; + static doublereal eps, tau, tol; + static integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + static integer idxjp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer jprev; + + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *); + static doublereal hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASD7 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. There + are two ways in which deflation can occur: when two or more singular + values are close together or if there is a tiny entry in the Z + vector. For each such occurrence the order of the related + secular equation problem is reduced by one. + + DLASD7 is called from DLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows: + = 0: Compute singular values only. + = 1: Compute singular vectors of upper + bidiagonal matrix in compact form. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, this is + the order of the related secular equation. 1 <= K <=N. + + D (input/output) DOUBLE PRECISION array, dimension ( N ) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) DOUBLE PRECISION array, dimension ( M ) + On exit Z contains the updating row vector in the secular + equation. + + ZW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for Z. + + VF (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VFW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for VF. + + VL (input/output) DOUBLE PRECISION array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors + of the lower block. On exit, VL contains the last components + of all right singular vectors of the bidiagonal matrix. + + VLW (workspace) DOUBLE PRECISION array, dimension ( M ) + Workspace for VL. + + ALPHA (input) DOUBLE PRECISION + Contains the diagonal element associated with the added row. + + BETA (input) DOUBLE PRECISION + Contains the off-diagonal element associated with the added + row. + + DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + IDX (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXP (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDXQ (input) INTEGER array, dimension ( N ) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first half of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each singular block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM, must be at least N. + + C (output) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD7", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* + Generate the first part of the vector Z and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerence */ + + eps = EPSILON; +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 64. * max(d__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* + Sort the singular values into DSIGMA. The singular values which + were not deflated go into the first K slots of DSIGMA, except + that DSIGMA(1) is treated separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* + The deflated singular values go back into the last N - K slots of + D. +*/ + + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* + Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and + VL(M). +*/ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.; + *s = 0.; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of DLASD7 */ + +} /* dlasd7_ */ + +/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer i__, j; + static doublereal dj, rho; + static integer iwk1, iwk2, iwk3; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + static integer iwk2i, iwk3i; + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *); + static doublereal dsigjp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASD8 finds the square roots of the roots of the secular equation, + as defined by the values in DSIGMA and Z. It makes the appropriate + calls to DLASD4, and stores, for each element in D, the distance + to its two nearest poles (elements in DSIGMA). It also updates + the arrays VF and VL, the first and last components of all the + right singular vectors of the original bidiagonal matrix. + + DLASD8 is called from DLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form in the calling routine: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + K (input) INTEGER + The number of terms in the rational function to be solved + by DLASD4. K >= 1. + + D (output) DOUBLE PRECISION array, dimension ( K ) + On output, D contains the updated singular values. + + Z (input) DOUBLE PRECISION array, dimension ( K ) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + VF (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, VF contains information passed through DBEDE8. + On exit, VF contains the first K components of the first + components of all right singular vectors of the bidiagonal + matrix. + + VL (input/output) DOUBLE PRECISION array, dimension ( K ) + On entry, VL contains information passed through DBEDE8. + On exit, VL contains the first K components of the last + components of all right singular vectors of the bidiagonal + matrix. + + DIFL (output) DOUBLE PRECISION array, dimension ( K ) + On exit, DIFL(I) = D(I) - DSIGMA(I). + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and + dimension ( K ) if ICOMPQ = 0. + On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not + defined and will not be referenced. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + LDDIFR (input) INTEGER + The leading dimension of DIFR, must be at least K. + + DSIGMA (input) DOUBLE PRECISION array, dimension ( K ) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASD8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.; + difr[(difr_dim1 << 1) + 1] = 1.; + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + dlaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k); + +/* + Compute the updated singular values, the arrays DIFL, DIFR, + and the updated Z. +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = dnrm2_(k, &work[1], &c__1); + work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of DLASD8 */ + +} /* dlasd8_ */ + +/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, + nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + static doublereal beta; + static integer idxq, nlvl; + static doublereal alpha; + static integer inode, ndiml, ndimr, idxqi, itemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer sqrei; + extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + static integer nwork1, nwork2; + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + static integer smlszp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + Using a divide and conquer approach, DLASDA computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + B with diagonal D and offdiagonal E, where M = N + SQRE. The + algorithm computes the singular values in the SVD B = U * S * VT. + The orthogonal matrices U and VT are optionally computed in + compact form. + + A related subroutine, DLASD0, computes the singular values and + the singular vectors in explicit form. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows + = 0: Compute singular values only. + = 1: Compute singular vectors of upper bidiagonal + matrix in compact form. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row dimension of the upper bidiagonal matrix. This is + also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N + 1. + + D (input/output) DOUBLE PRECISION array, dimension ( N ) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit D, if INFO = 0, contains its singular values. + + E (input) DOUBLE PRECISION array, dimension ( M-1 ) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) DOUBLE PRECISION array, + dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left + singular vector matrices of all subproblems at the bottom + level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, POLES, + GIVNUM, and Z. + + VT (output) DOUBLE PRECISION array, + dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right + singular vector matrices of all subproblems at the bottom + level. + + K (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th + secular equation on the computation tree. + + DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), + where NLVL = floor(log_2 (N/SMLSIZ))). + + DIFR (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) + record distances between singular values on the I-th + level and singular values on the (I -1)-th level, and + DIFR(1:N, 2 * I ) contains the normalizing factors for + the right singular vector matrix. See DLASD8 for details. + + Z (output) DOUBLE PRECISION array, + dimension ( LDU, NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + The first K elements of Z(1, I) contain the components of + the deflation-adjusted updating row vector for subproblems + on the I-th level. + + POLES (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and + POLES(1, 2*I) contain the new and old singular values + involved in the secular equations on the I-th level. + + GIVPTR (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1, and not referenced if + ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records + the number of Givens rotations performed on the I-th + problem on the computation tree. + + GIVCOL (output) INTEGER array, + dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations + of Givens rotations performed on the I-th level on the + computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (output) INTEGER array, + dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records + permutations done on the I-th level of the computation tree. + + GIVNUM (output) DOUBLE PRECISION array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- + values of Givens rotations performed on the I-th level on + the computation tree. + + C (output) DOUBLE PRECISION array, + dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1 and the I-th subproblem is not square, on exit, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (output) DOUBLE PRECISION array, dimension ( N ) if + ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 + and the I-th subproblem is not square, on exit, S( I ) + contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) DOUBLE PRECISION array, dimension + (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). + + IWORK (workspace) INTEGER array. + Dimension must be at least (7 * N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASDA", &i__1); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info); + } else { + dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + for the nodes on bottom level of the tree, solve + their subproblems by DLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info); + itemp = nwork1 + nl * smlszp; + dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu); + dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1], + ldu); + dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info); + dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info); + itemp = nwork1 + (nrp1 - 1) * smlszp; + dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu); + dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1], + ldu); + dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info); + dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASDA */ + +} /* dlasda_ */ + +/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j; + static doublereal r__, cs, sn; + static integer np1, isub; + static doublereal smin; + static integer sqre1; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * + , doublereal *, integer *); + static integer iuplo; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *), dbdsqr_(char *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + static logical rotate; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASDQ computes the singular value decomposition (SVD) of a real + (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + E, accumulating the transformations if desired. Letting B denote + the input bidiagonal matrix, the algorithm computes orthogonal + matrices Q and P such that B = Q * S * P' (P' denotes the transpose + of P). The singular values S are overwritten on D. + + The input matrix U is changed to U * Q if desired. + The input matrix VT is changed to P' * VT if desired. + The input matrix C is changed to Q' * C if desired. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3, for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + On entry, UPLO specifies whether the input bidiagonal matrix + is upper or lower bidiagonal, and wether it is square are + not. + UPLO = 'U' or 'u' B is upper bidiagonal. + UPLO = 'L' or 'l' B is lower bidiagonal. + + SQRE (input) INTEGER + = 0: then the input matrix is N-by-N. + = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and + (N+1)-by-N if UPLU = 'L'. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + N (input) INTEGER + On entry, N specifies the number of rows and columns + in the matrix. N must be at least 0. + + NCVT (input) INTEGER + On entry, NCVT specifies the number of columns of + the matrix VT. NCVT must be at least 0. + + NRU (input) INTEGER + On entry, NRU specifies the number of rows of + the matrix U. NRU must be at least 0. + + NCC (input) INTEGER + On entry, NCC specifies the number of columns of + the matrix C. NCC must be at least 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the diagonal entries of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in ascending order. + + E (input/output) DOUBLE PRECISION array. + dimension is (N-1) if SQRE = 0 and N if SQRE = 1. + On entry, the entries of E contain the offdiagonal entries + of the bidiagonal matrix whose SVD is desired. On normal + exit, E will contain 0. If the algorithm does not converge, + D and E will contain the diagonal and superdiagonal entries + of a bidiagonal matrix orthogonally equivalent to the one + given as input. + + VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) + On entry, contains a matrix which on exit has been + premultiplied by P', dimension N-by-NCVT if SQRE = 0 + and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). + + LDVT (input) INTEGER + On entry, LDVT specifies the leading dimension of VT as + declared in the calling (sub) program. LDVT must be at + least 1. If NCVT is nonzero LDVT must also be at least N. + + U (input/output) DOUBLE PRECISION array, dimension (LDU, N) + On entry, contains a matrix which on exit has been + postmultiplied by Q, dimension NRU-by-N if SQRE = 0 + and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). + + LDU (input) INTEGER + On entry, LDU specifies the leading dimension of U as + declared in the calling (sub) program. LDU must be at + least max( 1, NRU ) . + + C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) + On entry, contains an N-by-NCC matrix which on exit + has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 + and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). + + LDC (input) INTEGER + On entry, LDC specifies the leading dimension of C as + declared in the calling (sub) program. LDC must be at + least 1. If NCC is nonzero, LDC must also be at least N. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + Workspace. Only referenced if one of NCVT, NRU, or NCC is + nonzero, and if N is at least 2. + + INFO (output) INTEGER + On exit, a value of 0 indicates a successful exit. + If INFO < 0, argument number -INFO is illegal. + If INFO > 0, the algorithm did not converge, and INFO + specifies how many superdiagonals did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASDQ", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* + If matrix non-square upper bidiagonal, rotate to be lower + bidiagonal. The rotations are on the right. +*/ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt); + } + } + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left. +*/ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* + If matrix (N+1)-by-N lower bidiagonal, one additional + rotation is needed. +*/ + + if (sqre1 == 1) { + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu); + } else { + dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } else { + dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } + } + } + +/* + Call DBDSQR to compute the SVD of the reduced real + N-by-N upper bidiagonal matrix. +*/ + + dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info); + +/* + Sort the singular values into ascending order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] + , &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of DLASDQ */ + +} /* dlasdq_ */ + +/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + static integer i__, il, ir, maxn; + static doublereal temp; + static integer nlvl, llst, ncrnt; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASDT creates a tree of subproblems for bidiagonal divide and + conquer. + + Arguments + ========= + + N (input) INTEGER + On entry, the number of diagonal elements of the + bidiagonal matrix. + + LVL (output) INTEGER + On exit, the number of levels on the computation tree. + + ND (output) INTEGER + On exit, the number of nodes on the tree. + + INODE (output) INTEGER array, dimension ( N ) + On exit, centers of subproblems. + + NDIML (output) INTEGER array, dimension ( N ) + On exit, row dimensions of left children. + + NDIMR (output) INTEGER array, dimension ( N ) + On exit, row dimensions of right children. + + MSUB (input) INTEGER. + On entry, the maximum row dimension each subproblem at the + bottom of the tree can be of. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Find the number of levels on the tree. +*/ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* + Constructing the tree at (NLVL+1)-st level. The number of + nodes created on this level is LLST * 2. +*/ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of DLASDT */ + +} /* dlasdt_ */ + +/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLASET initializes an m-by-n matrix A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set; the strictly lower + triangular part of A is not changed. + = 'L': Lower triangular part is set; the strictly upper + triangular part of A is not changed. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + ALPHA (input) DOUBLE PRECISION + The constant to which the offdiagonal elements are to be set. + + BETA (input) DOUBLE PRECISION + The constant to which the diagonal elements are to be set. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On exit, the leading m-by-n submatrix of A is set as follows: + + if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, + if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, + otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, + + and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the strictly upper triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the strictly lower triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of DLASET */ + +} /* dlaset_ */ + +/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__; + static doublereal eps; + extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + static doublereal scale; + static integer iinfo; + static doublereal sigmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static doublereal sigmx; + extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_( + char *, integer *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASQ1 computes the singular values of a real N-by-N bidiagonal + matrix with diagonal D and off-diagonal E. The singular values + are computed to high relative accuracy, in the absence of + denormalization, underflow and overflow. The algorithm was first + presented in + + "Accurate singular values and differential qd algorithms" by K. V. + Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + 1994, + + and the present implementation is described in "An implementation of + the dqds Algorithm (Positive Case)", LAPACK Working Note. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the diagonal elements of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in decreasing order. + + E (input/output) DOUBLE PRECISION array, dimension (N) + On entry, elements E(1:N-1) contain the off-diagonal elements + of the bidiagonal matrix whose SVD is desired. + On exit, E is overwritten. + + WORK (workspace) DOUBLE PRECISION array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + i__1 = -(*info); + xerbla_("DLASQ1", &i__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = abs(d__[1]); + return 0; + } else if (*n == 2) { + dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* Computing MAX */ + d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); + sigmx = max(d__2,d__3); +/* L10: */ + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.) { + dlasrt_("D", n, &d__[1], &iinfo); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sigmx, d__2 = d__[i__]; + sigmx = max(d__1,d__2); +/* L20: */ + } + +/* + Copy D and E into WORK (in the Z format) and scale (squaring the + input data makes scaling by a power of the radix pointless). +*/ + + eps = PRECISION; + safmin = SAFEMINIMUM; + scale = sqrt(eps / safmin); + dcopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = work[i__]; + work[i__] = d__1 * d__1; +/* L30: */ + } + work[*n * 2] = 0.; + + dlasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo); + } + + return 0; + +/* End of DLASQ1 */ + +} /* dlasq1_ */ + +/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal d__, e; + static integer k; + static doublereal s, t; + static integer i0, i4, n0, pp; + static doublereal eps, tol; + static integer ipn4; + static doublereal tol2; + static logical ieee; + static integer nbig; + static doublereal dmin__, emin, emax; + static integer ndiv, iter; + static doublereal qmin, temp, qmax, zmax; + static integer splt, nfail; + static doublereal desig, trace, sigma; + static integer iinfo; + extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, logical *); + + static integer iwhila, iwhilb; + static doublereal oldemn, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASQ2 computes all the eigenvalues of the symmetric positive + definite tridiagonal matrix associated with the qd array Z to high + relative accuracy are computed to high relative accuracy, in the + absence of denormalization, underflow and overflow. + + To see the relation of Z to the tridiagonal matrix, let L be a + unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + let U be an upper bidiagonal matrix with 1's above and diagonal + Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + symmetric tridiagonal to which it is similar. + + Note : DLASQ2 defines a logical variable, IEEE, which is true + on machines which follow ieee-754 floating-point standard in their + handling of infinities and NaNs, and false otherwise. This variable + is passed to DLASQ3. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + Z (workspace) DOUBLE PRECISION array, dimension ( 4*N ) + On entry Z holds the qd array. On exit, entries 1 to N hold + the eigenvalues in decreasing order, Z( 2*N+1 ) holds the + trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If + N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) + holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of + shifts that failed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if the i-th argument is a scalar and had an illegal + value, then INFO = -i, if the i-th argument is an + array and the j-entry had an illegal value, then + INFO = -(i*100+j) + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + Further Details + =============== + Local Variables: I0:N0 defines a current unreduced segment of Z. + The shifts are accumulated in SIGMA. Iteration count is in ITER. + Ping-pong is controlled by PP (alternates between 0 and 1). + + ===================================================================== + + + Test the input arguments. + (in case DLASQ2 is not called by DLASQ1) +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = PRECISION; + safmin = SAFEMINIMUM; + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + if (*n < 0) { + *info = -1; + xerbla_("DLASQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.) { + *info = -201; + xerbla_("DLASQ2", &c__2); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[2] < 0. || z__[3] < 0.) { + *info = -2; + xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.; + emin = z__[2]; + qmax = 0.; + zmax = 0.; + d__ = 0.; + e = 0.; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.) { + *info = -(k + 200); + xerbla_("DLASQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.) { + *info = -(k + 201); + xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[k]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[k + 1]; + emin = min(d__1,d__2); +/* Computing MAX */ + d__1 = max(qmax,zmax), d__2 = z__[k + 1]; + zmax = max(d__1,d__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.) { + *info = -((*n << 1) + 199); + xerbla_("DLASQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[(*n << 1) - 1]; + qmax = max(d__1,d__2); + zmax = max(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + dlasrt_("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.) { + z__[(*n << 1) - 1] = 0.; + return 0; + } + +/* Check whether the machine is IEEE conformable. */ + + ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) + 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2, + &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - (pp << 1)]; + emin = min(d__1,d__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4]; + qmax = max(d__1,d__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L150; + } + +/* + While array unfinished do + + E(N0) holds the value of SIGMA when submatrix in I0:N0 + splits from the rest of the array, but is negated. +*/ + + desig = 0.; + if (n0 == *n) { + sigma = 0.; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.) { + *info = 1; + return 0; + } + +/* + Find last unreduced submatrix's top index I0, find QMAX and + EMIN. Find Gershgorin-type bound if Q's much greater than E's. +*/ + + emax = 0.; + if (n0 > i0) { + emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); + } else { + emin = 0.; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.) { + goto L100; + } + if (qmin >= emax * 4.) { +/* Computing MIN */ + d__1 = qmin, d__2 = z__[i4 - 3]; + qmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = emax, d__2 = z__[i4 - 5]; + emax = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 5]; + emin = min(d__1,d__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + +/* Store EMIN for passing to DLASQ3. */ + + z__[(n0 << 2) - 1] = emin; + +/* + Put -(initial shift) into DMIN. + + Computing MAX +*/ + d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); + dmin__ = -max(d__1,d__2); + +/* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */ + + pp = 0; + + nbig = (n0 - i0 + 1) * 30; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L130; + } + +/* While submatrix unfinished take a good dqds step. */ + + dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 + 1]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 1]; + emin = min(d__1,d__2); +/* Computing MIN */ + d__1 = oldemn, d__2 = z__[i4]; + oldemn = min(d__1,d__2); + } +/* L110: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L120: */ + } + + *info = 2; + return 0; + +/* end IWHILB */ + +L130: + +/* L140: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L150: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L160: */ + } + +/* Sort and compute sum of eigenvalues. */ + + dlasrt_("D", n, &z__[1], &iinfo); + + e = 0.; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L170: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (doublereal) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; + return 0; + +/* End of DLASQ2 */ + +} /* dlasq2_ */ + +/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee) +{ + /* Initialized data */ + + static integer ttype = 0; + static doublereal dmin1 = 0.; + static doublereal dmin2 = 0.; + static doublereal dn = 0.; + static doublereal dn1 = 0.; + static doublereal dn2 = 0.; + static doublereal tau = 0.; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal s, t; + static integer j4, nn; + static doublereal eps, tol; + static integer n0in, ipn4; + static doublereal tol2, temp; + extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *) + , dlasq5_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, logical *), dlasq6_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + + static doublereal safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + May 17, 2000 + + + Purpose + ======= + + DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. + In case of failure it changes shifts, and tries again until output + is positive. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + SIGMA (output) DOUBLE PRECISION + Sum of shifts used in current segment. + + DESIG (input/output) DOUBLE PRECISION + Lower order part of SIGMA + + QMAX (input) DOUBLE PRECISION + Maximum value of q. + + NFAIL (output) INTEGER + Number of times shift was too big. + + ITER (output) INTEGER + Number of iterations. + + NDIV (output) INTEGER + Number of divisions. + + TTYPE (output) INTEGER + Shift type. + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + + n0in = *n0; + eps = PRECISION; + safmin = SAFEMINIMUM; + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0. || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; + dmin2 = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] + , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = + min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = min(d__1,d__2); +/* Computing MAX */ + d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, + d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = max(d__1,d__2); + *dmin__ = -0.; + } + } + +/* + L70: + + Computing MIN +*/ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 = + min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp]; + if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) { + +/* Choose a shift. */ + + dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1, + &dn2, &tau, &ttype); + +/* Call dqds until DMIN > 0. */ + +L80: + + dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1, + &dn2, ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0. && dmin1 > 0.) { + +/* Success. */ + + goto L100; + + } else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < + tol * (*sigma + dn1) && abs(dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.; + *dmin__ = 0.; + goto L100; + } else if (*dmin__ < 0.) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (ttype < -22) { + +/* Failed twice. Play it safe. */ + + tau = 0.; + } else if (dmin1 > 0.) { + +/* Late failure. Gives excellent shift. */ + + tau = (tau + *dmin__) * (1. - eps * 2.); + ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + tau *= .25; + ttype += -12; + } + goto L80; + } else if (*dmin__ != *dmin__) { + +/* NaN. */ + + tau = 0.; + goto L80; + } else { + +/* Possible underflow. Play it safe. */ + + goto L90; + } + } + +/* Risk of underflow. */ + +L90: + dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + tau = 0.; + +L100: + if (tau < *sigma) { + *desig += tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + tau; + *desig = *sigma - (t - tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of DLASQ3 */ + +} /* dlasq3_ */ + +/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype) +{ + /* Initialized data */ + + static doublereal g = 0.; + + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal s, a2, b1, b2; + static integer i4, nn, np; + static doublereal gam, gap1, gap2; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASQ4 computes an approximation TAU to the smallest eigenvalue + using values of d from the previous transform. + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + NOIN (input) INTEGER + The value of N0 at start of EIGTEST. + + DMIN (input) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (input) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (input) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (input) DOUBLE PRECISION + d(N) + + DN1 (input) DOUBLE PRECISION + d(N-1) + + DN2 (input) DOUBLE PRECISION + d(N-2) + + TAU (output) DOUBLE PRECISION + This is the shift. + + TTYPE (output) INTEGER + Shift type. + + Further Details + =============== + CNST1 = 9/16 + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + +/* + A negative DMIN forces the shift to take that absolute value + TTYPE records the type of shift. +*/ + + if (*dmin__ <= 0.) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25; + if (gap2 > 0. && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0. && gap1 > b1) { +/* Computing MAX */ + d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; + s = max(d__1,d__2); + *ttype = -2; + } else { + s = 0.; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + d__1 = s, d__2 = a2 - (b1 + b2); + s = min(d__1,d__2); + } +/* Computing MAX */ + d__1 = s, d__2 = *dmin__ * .333; + s = max(d__1,d__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05; + } + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + g += (1. - g) * .333; + } else if (*ttype == -18) { + g = .083250000000000005; + } else { + g = .25; + } + s = g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (max(b1,a2) * 100. < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin1 / (d__1 * d__1 + 1.); + gap2 = *dmin2 * .5 - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25; + if (*dmin1 == *dn1) { + s = *dmin1 * .5; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* + Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. + + Cases 10 and 11. +*/ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100. < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin2 / (d__1 * d__1 + 1.); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + } + } else { + s = *dmin2 * .25; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of DLASQ4 */ + +} /* dlasq4_ */ + +/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2, + logical *ieee) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal d__; + static integer j4, j4p2; + static doublereal emin, temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + May 17, 2000 + + + Purpose + ======= + + DLASQ5 computes one dqds transform in ping-pong form, one + version for IEEE machines another for non IEEE machines. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + TAU (input) DOUBLE PRECISION + This is the shift. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) DOUBLE PRECISION + d(N0), the last value of d. + + DNM1 (output) DOUBLE PRECISION + d(N0-1). + + DNM2 (output) DOUBLE PRECISION + d(N0-2). + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + d__1 = z__[j4]; + emin = min(d__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = min(d__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ5 */ + +} /* dlasq5_ */ + +/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + static doublereal d__; + static integer j4, j4p2; + static doublereal emin, temp; + + static doublereal safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + DLASQ6 computes one dqd (shift equal to zero) transform in + ping-pong form, with protection against underflow and overflow. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) DOUBLE PRECISION array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) DOUBLE PRECISION + Minimum value of d. + + DMIN1 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) DOUBLE PRECISION + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) DOUBLE PRECISION + d(N0), the last value of d. + + DNM1 (output) DOUBLE PRECISION + d(N0-1). + + DNM2 (output) DOUBLE PRECISION + d(N0-2). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = SAFEMINIMUM; + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.) { + z__[j4 - 1] = 0.; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ6 */ + +} /* dlasq6_ */ + +/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, info; + static doublereal temp; + extern logical lsame_(char *, char *); + static doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLASR performs the transformation + + A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) + + A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) + + where A is an m by n real matrix and P is an orthogonal matrix, + consisting of a sequence of plane rotations determined by the + parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' + and z = n when SIDE = 'R' or 'r' ): + + When DIRECT = 'F' or 'f' ( Forward sequence ) then + + P = P( z - 1 )*...*P( 2 )*P( 1 ), + + and when DIRECT = 'B' or 'b' ( Backward sequence ) then + + P = P( 1 )*P( 2 )*...*P( z - 1 ), + + where P( k ) is a plane rotation matrix for the following planes: + + when PIVOT = 'V' or 'v' ( Variable pivot ), + the plane ( k, k + 1 ) + + when PIVOT = 'T' or 't' ( Top pivot ), + the plane ( 1, k + 1 ) + + when PIVOT = 'B' or 'b' ( Bottom pivot ), + the plane ( k, z ) + + c( k ) and s( k ) must contain the cosine and sine that define the + matrix P( k ). The two by two plane rotation part of the matrix + P( k ), R( k ), is assumed to be of the form + + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + This version vectorises across rows of the array A when SIDE = 'L'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P' + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) + = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C, S (input) DOUBLE PRECISION arrays, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + c(k) and s(k) contain the cosine and sine that define the + matrix P(k). The two by two plane rotation part of the + matrix P(k), R(k), is assumed to be of the form + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P' if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("DLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of DLASR */ + +} /* dlasr_ */ + +/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer * + info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, j; + static doublereal d1, d2, d3; + static integer dir; + static doublereal tmp; + static integer endd; + extern logical lsame_(char *, char *); + static integer stack[64] /* was [2][32] */; + static doublereal dmnmx; + static integer start; + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer stkpnt; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + Sort the numbers in D in increasing order (if ID = 'I') or + in decreasing order (if ID = 'D' ). + + Use Quick Sort, reverting to Insertion sort on arrays of + size <= 20. Dimension of STACK limits N to about 2**32. + + Arguments + ========= + + ID (input) CHARACTER*1 + = 'I': sort D in increasing order; + = 'D': sort D in decreasing order. + + N (input) INTEGER + The length of the array D. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the array to be sorted. + On exit, D has been sorted into increasing order + (D(1) <= ... <= D(N) ) or into decreasing order + (D(1) >= ... >= D(N) ), depending on ID. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input paramters. +*/ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (lsame_(id, "D")) { + dir = 0; + } else if (lsame_(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLASRT", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* + Partition D( START:ENDD ) and stack parts, largest one first + + Choose partition entry as median of 3 +*/ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of DLASRT */ + +} /* dlasrt_ */ + +/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer ix; + static doublereal absxi; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASSQ returns the values scl and smsq such that + + ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + assumed to be non-negative and scl returns the value + + scl = max( scale, abs( x( i ) ) ). + + scale and sumsq must be supplied in SCALE and SUMSQ and + scl and smsq are overwritten on SCALE and SUMSQ respectively. + + The routine makes only one pass through the vector x. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) DOUBLE PRECISION array, dimension (N) + The vector for which a scaled sum of squares is computed. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) DOUBLE PRECISION + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with scl , the scaling factor + for the sum of squares. + + SUMSQ (input/output) DOUBLE PRECISION + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with smsq , the basic sum of + squares from which scl has been factored out. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of DLASSQ */ + +} /* dlassq_ */ + +/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, + clt, crt, slt, srt; + static integer pmax; + static doublereal temp; + static logical swap; + static doublereal tsign; + + static logical gasmal; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLASV2 computes the singular value decomposition of a 2-by-2 + triangular matrix + [ F G ] + [ 0 H ]. + On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + right singular vectors for abs(SSMAX), giving the decomposition + + [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + Arguments + ========= + + F (input) DOUBLE PRECISION + The (1,1) element of the 2-by-2 matrix. + + G (input) DOUBLE PRECISION + The (1,2) element of the 2-by-2 matrix. + + H (input) DOUBLE PRECISION + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) DOUBLE PRECISION + abs(SSMIN) is the smaller singular value. + + SSMAX (output) DOUBLE PRECISION + abs(SSMAX) is the larger singular value. + + SNL (output) DOUBLE PRECISION + CSL (output) DOUBLE PRECISION + The vector (CSL, SNL) is a unit left singular vector for the + singular value abs(SSMAX). + + SNR (output) DOUBLE PRECISION + CSR (output) DOUBLE PRECISION + The vector (CSR, SNR) is a unit right singular vector for the + singular value abs(SSMAX). + + Further Details + =============== + + Any input parameter may be aliased with any output parameter. + + Barring over/underflow and assuming a guard digit in subtraction, all + output quantities are correct to within a few units in the last + place (ulps). + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ===================================================================== +*/ + + + ft = *f; + fa = abs(ft); + ht = *h__; + ha = abs(*h__); + +/* + PMAX points to the maximum absolute element of matrix + PMAX = 1 if F largest in absolute values + PMAX = 2 if G largest in absolute values + PMAX = 3 if H largest in absolute values +*/ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = abs(gt); + if (ga == 0.) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.; + crt = 1.; + slt = 0.; + srt = 0.; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < EPSILON) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.; + slt = ht / gt; + srt = 1.; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2. - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.) { + r__ = abs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.) { + +/* Note that M is very tiny */ + + if (l == 0.) { + t = d_sign(&c_b2804, &ft) * d_sign(&c_b15, >); + } else { + t = gt / d_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.); + } + l = sqrt(t * t + 4.); + crt = 2. / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = d_sign(&c_b15, csr) * d_sign(&c_b15, csl) * d_sign(&c_b15, f); + } + if (pmax == 2) { + tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, csl) * d_sign(&c_b15, g); + } + if (pmax == 3) { + tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, snl) * d_sign(&c_b15, + h__); + } + *ssmax = d_sign(ssmax, &tsign); + d__1 = tsign * d_sign(&c_b15, f) * d_sign(&c_b15, h__); + *ssmin = d_sign(ssmin, &d__1); + return 0; + +/* End of DLASV2 */ + +} /* dlasv2_ */ + +/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static doublereal temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (M*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of DLASWP */ + +} /* dlaswp_ */ + +/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, iw; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal alpha; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + dsymv_(char *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *, + doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DLATRD reduces NB rows and columns of a real symmetric matrix A to + symmetric tridiagonal form by an orthogonal similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', DLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', DLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by DSYTRD. + + Arguments + ========= + + UPLO (input) CHARACTER + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= (1,N). + + E (output) DOUBLE PRECISION array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) DOUBLE PRECISION array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a symmetric rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + dgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + dgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1; + dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + dsymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) + * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1) + * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1], + &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1], + ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + dsymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1] + , ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of DLATRD */ + +} /* dlatrd_ */ + +/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], + lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + aii, &a[i__ * a_dim1 + 1], &c__1); + } else { + dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], & + c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[ + i__ + a_dim1], lda); + } else { + dscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of DLAUU2 */ + +} /* dlauu2_ */ + +/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), dlauu2_(char *, integer *, + doublereal *, integer *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dlauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, + &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + + 1], lda) + ; + dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * + a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & + c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], + lda); + dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & + c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * + a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of DLAUUM */ + +} /* dlauum_ */ + +/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DORG2R generates an m by n real matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by DGEQRF in the first k columns of its array + argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + WORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[j + j * a_dim1] = 1.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2R */ + +} /* dorg2r_ */ + +/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORGBR generates one of the real orthogonal matrices Q or P**T + determined by DGEBRD when reducing a real matrix A to bidiagonal + form: A = Q * B * P**T. Q and P**T are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + is of order N: + if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m + rows of P**T, where n >= m >= k; + if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**T is + required, as defined in the transformation applied by DGEBRD: + = 'Q': generate Q; + = 'P': generate P**T. + + M (input) INTEGER + The number of rows of the matrix Q or P**T to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**T to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by DGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by DGEBRD. + K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by DGEBRD. + On exit, the M-by-N matrix Q or P**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**T, as + returned by DGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to DGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to DGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGBR */ + +} /* dorgbr_ */ + +/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORGHR generates a real orthogonal matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + DGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of DGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by DGEHRD. + On exit, the N-by-N orthogonal matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEHRD. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L50: */ + } + a[j + j * a_dim1] = 1.; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L70: */ + } + a[j + j * a_dim1] = 1.; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGHR */ + +} /* dorghr_ */ + +/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORGL2 generates an m by n real matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by DGELQF in the first k rows of its array argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + WORK (workspace) DOUBLE PRECISION array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORGL2 */ + +} /* dorgl2_ */ + +/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORGLQ generates an M-by-N real matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by DGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGLQ */ + +} /* dorglq_ */ + +/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORGQR generates an M-by-N real matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by DGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + dlarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQR */ + +} /* dorgqr_ */ + +/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DORM2L overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQLF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2L */ + +} /* dorm2l_ */ + +/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DORM2R overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2R */ + +} /* dorm2r_ */ + +/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static logical notran; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + static logical applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'T': P**T * C C * P**T + + Here Q and P**T are the orthogonal matrices determined by DGEBRD when + reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + P**T are defined as products of elementary reflectors H(i) and G(i) + respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the orthogonal matrix Q or P**T that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**T; + = 'P': apply P or P**T. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**T, P or P**T from the Left; + = 'R': apply Q, Q**T, P or P**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'T': Transpose, apply Q**T or P**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by DGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by DGEBRD. + K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by DGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by DGEBRD in the array argument TAUQ or TAUP. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q + or P*C or P**T*C or C*P or C*P**T. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to DGEBRD with nq >= k */ + + dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to DGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to DGEBRD with nq > k */ + + dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to DGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMBR */ + +} /* dormbr_ */ + +/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublereal aii; + static logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DORML2 overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) DOUBLE PRECISION array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORML2 */ + +} /* dorml2_ */ + +/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORMLQ overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGELQF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMLQ */ + +} /* dormlq_ */ + +/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORMQL overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQLF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQL */ + +} /* dormql_ */ + +/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublereal t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORMQR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + DGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) DOUBLE PRECISION array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DGEQRF. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQR */ + +} /* dormqr_ */ + +/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DORMTR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by DSYTRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from DSYTRD; + = 'L': Lower triangle of A contains elementary reflectors + from DSYTRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) DOUBLE PRECISION array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by DSYTRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) DOUBLE PRECISION array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by DSYTRD. + + C (input/output) DOUBLE PRECISION array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("DORMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMTR */ + +} /* dormtr_ */ + +/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static doublereal ajj; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0.) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, & + a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0.) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of DPOTF2 */ + +} /* dpotf2_ */ + +/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), dpotf2_(char *, integer *, + doublereal *, integer *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + dpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j * + a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); + dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j + + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); + dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b15, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* dpotrf_ */ + +/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_( + char *, integer *, doublereal *, integer *, integer *), + dtrtri_(char *, char *, integer *, doublereal *, integer *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DPOTRI computes the inverse of a real symmetric positive definite + matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + computed by DPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T, as computed by + DPOTRF. + On exit, the upper or lower triangle of the (symmetric) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + dlauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of DPOTRI */ + +} /* dpotri_ */ + +/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by DPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by DPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of DPOTRS */ + +} /* dpotrs_ */ + +/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k, m; + static doublereal p; + static integer ii, end, lgn; + static doublereal eps, tiny; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer lwmin; + extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + static integer start; + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dlasrt_(char *, integer *, doublereal *, integer *); + static integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static doublereal orgnrm; + static logical lquery; + static integer smlsiz, dtrtrw, storez; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band real symmetric matrix can also be + found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original dense symmetric + matrix also. On entry, Z contains the orthogonal + matrix used to reduce the original matrix to + tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LWORK must be at least + ( 1 + 3*N + 2*N*lg N + 3*N**2 ), + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1 then LWORK must be at least + ( 1 + 4*N + N**2 ). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LIWORK must be at least + ( 6 + 6*N + 5*N*lg N ). + If COMPZ = 'I' and N > 1 then LIWORK must be at least + ( 3 + 5*N ). + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (*n <= 1 || icompz <= 0) { + liwmin = 1; + lwmin = 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + + if (*info == 0) { + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + + smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures DSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. + + If COMPZ = 'N', use DSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + return 0; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + return 0; + } else if (icompz == 2) { + dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], + info); + return 0; + } else { + dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], + info); + return 0; + } + } + +/* + If COMPZ = 'V', the Z matrix must be stored elsewhere for later + use. +*/ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Scale. */ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + + eps = EPSILON; + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* + Let END be the position of the next subdiagonal entry such that + E( END ) <= TINY or END = N if no such subdiagonal exists. The + matrix identified by the elements between START and END + constitutes an independent sub-problem. +*/ + + end = start; +L20: + if (end < *n) { + tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 = + d__[end + 1], abs(d__2))); + if ((d__1 = e[end], abs(d__1)) > tiny) { + ++end; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = end - start + 1; + if (m == 1) { + start = end + 1; + goto L10; + } + if (m > smlsiz) { + *info = smlsiz; + +/* Scale. */ + + orgnrm = dlanst_("M", &m, &d__[start], &e[start]); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start] + , &m, info); + i__1 = m - 1; + i__2 = m - 1; + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[ + start], &i__2, info); + + if (icompz == 1) { + dtrtrw = 1; + } else { + dtrtrw = start; + } + dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[ + 1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + + 1) + start - 1; + return 0; + } + +/* Scale back. */ + + dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start] + , &m, info); + + } else { + if (icompz == 1) { + +/* + Since QR won't update a Z matrix which is larger than the + length of D, we must solve the sub-problem in a workspace and + then multiply back into Z. +*/ + + dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ + m * m + 1], info); + dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n); + dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[ + 1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz); + } else if (icompz == 2) { + dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start * + z_dim1], ldz, &work[1], info); + } else { + dsterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + end; + return 0; + } + } + + start = end + 1; + goto L10; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + + 1], &c__1); + } +/* L40: */ + } + } + } + + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSTEDC */ + +} /* dstedc_ */ + +/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static doublereal b, c__, f, g; + static integer i__, j, k, l, m; + static doublereal p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static doublereal rt1, rt2, eps; + static integer lsv; + static doublereal tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *); + static doublereal anorm; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaev2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + static integer lendm1, lendp1; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit, icompz; + static doublereal ssfmax; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + DSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band symmetric matrix can also be found + if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to + tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + symmetric matrix. On entry, Z must contain the + orthogonal matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is orthogonally similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b15); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b15); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of DSTEQR */ + +} /* dsteqr_ */ + +/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static doublereal c__; + static integer i__, l, m; + static doublereal p, r__, s; + static integer l1; + static doublereal bb, rt1, rt2, eps, rte; + static integer lsv; + static doublereal eps2, oldc; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + static doublereal gamma, alpha, sigma, anorm; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal oldgam, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal safmax; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit; + static doublereal ssfmax; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DSTERF computes all eigenvalues of a symmetric tridiagonal matrix + using the Pal-Walker-Kahan variant of the QL or QR algorithm. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the n diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed to find all of the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("DSTERF", &i__1); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * + sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + e[i__] = d__1 * d__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + + 1], abs(d__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* + If remaining matrix is 2 by 2, use DLAE2 to compute its + eigenvalues. +*/ + + if (m == l + 1) { + rte = sqrt(e[l]); + dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + - 1], abs(d__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* + If remaining matrix is 2 by 2, use DLAE2 to compute its + eigenvalues. +*/ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + dlasrt_("I", n, &d__[1], info); + +L180: + return 0; + +/* End of DSTERF */ + +} /* dsterf_ */ + +/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal eps; + static integer inde; + static doublereal anrm, rmin, rmax; + static integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal sigma; + extern logical lsame_(char *, char *); + static integer iinfo, lwmin, liopt; + static logical lower, wantz; + static integer indwk2, llwrk2; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *), dlacpy_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *); + static doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + static integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *); + static integer indwrk, liwmin; + extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + static integer llwork; + static doublereal smlnum; + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DSYEVD computes all eigenvalues and, optionally, eigenvectors of a + real symmetric matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Because of large use of BLAS of level 3, DSYEVD needs N**2 more + workspace than DSYEVX. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA, N) + On entry, the symmetric matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) DOUBLE PRECISION array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. + If JOBZ = 'V' and N > 1, LWORK must be at least + 1 + 6*N + 2*N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the algorithm failed to converge; i + off-diagonal elements of an intermediate tridiagonal + form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } + lopt = lwmin; + liopt = liwmin; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + + if (*info == 0) { + work[1] = (doublereal) lopt; + iwork[1] = liopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = SAFEMINIMUM; + eps = PRECISION; + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + dlascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + lopt = (integer) ((*n << 1) + work[indwrk]); + +/* + For eigenvalues only, call DSTERF. For eigenvectors, first call + DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call DORMTR to multiply it by the + Householder transformations stored in A. +*/ + + if (! wantz) { + dsterf_(n, &w[1], &work[inde], info); + } else { + dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* + Computing MAX + Computing 2nd power +*/ + i__3 = *n; + i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYEVD */ + +} /* dsyevd_ */ + +/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal taui; + extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + static doublereal alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer * + ); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal + form T by an orthogonal similarity transformation: Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYTD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + dsyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + dsyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of DSYTD2 */ + +} /* dsytd2_ */ + +/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlatrd_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *), xerbla_(char *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DSYTRD reduces a real symmetric matrix A to real symmetric + tridiagonal form T by an orthogonal similarity transformation: + Q**T * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) DOUBLE PRECISION array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DSYTRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ * + a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset], + lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+ib:n,i+ib:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYTRD */ + +} /* dsytrd_ */ + +/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select, + integer *n, doublereal *t, integer *ldt, doublereal *vl, integer * + ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k; + static doublereal x[4] /* was [2][2] */; + static integer j1, j2, n2, ii, ki, ip, is; + static doublereal wi, wr, rec, ulp, beta, emax; + static logical pair; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + static logical allv; + static integer ierr; + static doublereal unfl, ovfl, smin; + static logical over; + static doublereal vmax; + static integer jnxt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *); + static doublereal remax; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static logical leftv, bothv; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + static doublereal vcrit; + static logical somev; + static doublereal xnorm; + extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal * + , doublereal *, integer *, doublereal *, doublereal *, integer *), + dlabad_(doublereal *, doublereal *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + static logical rightv; + static doublereal smlnum; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + DTREVC computes some or all of the right and/or left eigenvectors of + a real upper quasi-triangular matrix T. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, y'*T = w*y' + + where y' denotes the conjugate transpose of the vector y. + + If all eigenvectors are requested, the routine may either return the + matrices X and/or Y of right or left eigenvectors of T, or the + products Q*X and/or Q*Y, where Q is an input orthogonal + matrix. If T was obtained from the real-Schur factorization of an + original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of + right or left eigenvectors of A. + + T must be in Schur canonical form (as returned by DHSEQR), that is, + block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + 2-by-2 diagonal block has its diagonal elements equal and its + off-diagonal elements of opposite sign. Corresponding to each 2-by-2 + diagonal block is a complex conjugate pair of eigenvalues and + eigenvectors; only one eigenvector of the pair is computed, namely + the one corresponding to the eigenvalue with positive imaginary part. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + and backtransform them using the input matrices + supplied in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + specified by the logical array SELECT. + + SELECT (input/output) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If HOWMNY = 'A' or 'B', SELECT is not referenced. + To select the real eigenvector corresponding to a real + eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select + the complex eigenvector corresponding to a complex conjugate + pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be + set to .TRUE.; then on exit SELECT(j) is .TRUE. and + SELECT(j+1) is .FALSE.. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input) DOUBLE PRECISION array, dimension (LDT,N) + The upper quasi-triangular matrix T in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) DOUBLE PRECISION array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by DHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + VL has the same quasi-lower triangular form + as T'. If T(i,i) is a real eigenvalue, then + the i-th column VL(i) of VL is its + corresponding eigenvector. If T(i:i+1,i:i+1) + is a 2-by-2 block whose eigenvalues are + complex-conjugate eigenvalues of T, then + VL(i)+sqrt(-1)*VL(i+1) is the complex + eigenvector corresponding to the eigenvalue + with positive real part. + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part, and the second the imaginary part. + If SIDE = 'R', VL is not referenced. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= max(1,N) if + SIDE = 'L' or 'B'; LDVL >= 1 otherwise. + + VR (input/output) DOUBLE PRECISION array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by DHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + VR has the same quasi-upper triangular form + as T. If T(i,i) is a real eigenvalue, then + the i-th column VR(i) of VR is its + corresponding eigenvector. If T(i:i+1,i:i+1) + is a 2-by-2 block whose eigenvalues are + complex-conjugate eigenvalues of T, then + VR(i)+sqrt(-1)*VR(i+1) is the complex + eigenvector corresponding to the eigenvalue + with positive real part. + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part and the second the imaginary part. + If SIDE = 'L', VR is not referenced. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= max(1,N) if + SIDE = 'R' or 'B'; LDVR >= 1 otherwise. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. + If HOWMNY = 'A' or 'B', M is set to N. + Each selected real eigenvector occupies one column and each + selected complex eigenvector occupies two columns. + + WORK (workspace) DOUBLE PRECISION array, dimension (3*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + +/* + Set M to the number of columns required to store the selected + eigenvectors, standardize the array SELECT if necessary, and + test MM. +*/ + + if (somev) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + work[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + work[j] = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); +/* L20: */ + } +/* L30: */ + } + +/* + Index IP is used to specify the real or complex eigenvalue: + IP = 0, real eigenvalue, + 1, first of conjugate complex pair: (wr,wi) + -1, second of conjugate complex pair: (wr,wi) +*/ + + n2 = *n << 1; + + if (rightv) { + +/* Compute right eigenvectors. */ + + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (ip == 1) { + goto L130; + } + if (ki == 1) { + goto L40; + } + if (t[ki + (ki - 1) * t_dim1] == 0.) { + goto L40; + } + ip = -1; + +L40: + if (somev) { + if (ip == 0) { + if (! select[ki]) { + goto L130; + } + } else { + if (! select[ki - 1]) { + goto L130; + } + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real right eigenvector */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -t[k + ki * t_dim1]; +/* L50: */ + } + +/* + Solve the upper quasi-triangular system: + (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +*/ + + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* + Scale X(1,1) to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j + *n] = x[0]; + +/* Update right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X(1,1) and X(2,1) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + +/* Update right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + } +L60: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + is * vr_dim1] = 0.; +/* L70: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki + *n], &vr[ki * + vr_dim1 + 1], &c__1); + } + + ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + + } else { + +/* + Complex right eigenvector. + + Initial solve + [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. + [ (T(KI,KI-1) T(KI,KI) ) ] +*/ + + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[ + ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + *n] = 1.; + work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + n2] = 1.; + } + work[ki + *n] = 0.; + work[ki - 1 + n2] = 0.; + +/* Form right-hand side */ + + i__1 = ki - 2; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * + t_dim1]; + work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; +/* L80: */ + } + +/* + Solve upper quasi-triangular system: + (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +*/ + + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & + ierr); + +/* + Scale X(1,1) and X(1,2) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + +/* Update the right-hand side */ + + i__1 = j - 1; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 1; + d__1 = -x[2]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &wi, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.) { +/* Computing MAX */ + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1,d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + +/* Scale if necessary */ + + if (scale != 1.) { + dscal_(&ki, &scale, &work[*n + 1], &c__1); + dscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + work[j - 1 + n2] = x[2]; + work[j + n2] = x[3]; + +/* Update the right-hand side */ + + i__1 = j - 2; + d__1 = -x[0]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[1]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 2; + d__1 = -x[2]; + daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[n2 + 1], &c__1); + i__1 = j - 2; + d__1 = -x[3]; + daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + } +L90: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + + 1], &c__1); + dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + emax = 0.; + i__1 = ki; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + is * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L100: */ + } + + remax = 1. / emax; + dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; +/* L110: */ + } + + } else { + + if (ki > 2) { + i__1 = ki - 2; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( + ki - 1) * vr_dim1 + 1], &c__1); + i__1 = ki - 2; + dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * + vr_dim1 + 1], &c__1); + } else { + dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + + 1], &c__1); + dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & + c__1); + } + + emax = 0.; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1] + , abs(d__1)) + (d__2 = vr[k + ki * vr_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L120: */ + } + remax = 1. / emax; + dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + } + + --is; + if (ip != 0) { + --is; + } +L130: + if (ip == 1) { + ip = 0; + } + if (ip == -1) { + ip = 1; + } +/* L140: */ + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + ip = 0; + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (ip == -1) { + goto L250; + } + if (ki == *n) { + goto L150; + } + if (t[ki + 1 + ki * t_dim1] == 0.) { + goto L150; + } + ip = 1; + +L150: + if (somev) { + if (! select[ki]) { + goto L250; + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } +/* Computing MAX */ + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1,smlnum); + + if (ip == 0) { + +/* Real left eigenvector. */ + + work[ki + *n] = 1.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + *n] = -t[ki + k * t_dim1]; +/* L160: */ + } + +/* + Solve the quasi-triangular system: + (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +*/ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 1; + i__2 = *n; + for (j = ki + 1; j <= i__2; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. +*/ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve (T(J,J)-WR)'*X = WORK */ + + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; +/* Computing MAX */ + d__2 = (d__1 = work[j + *n], abs(d__1)); + vmax = max(d__2,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. + + Computing MAX +*/ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + + i__3 = j - ki - 1; + work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) * + t_dim1], &c__1, &work[ki + 1 + *n], &c__1); + +/* + Solve + [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) + [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +*/ + + dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; + work[j + 1 + *n] = x[1]; + +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + 1 + *n], abs(d__2)), d__3 = max( + d__3,d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } +L170: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + + i__2 = *n - ki + 1; + ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - + 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; +/* L180: */ + } + + } else { + + if (ki < *n) { + i__2 = *n - ki; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1 + + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + } + + ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + + } + + } else { + +/* + Complex left eigenvector. + + Initial solve: + ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. + ((T(KI+1,KI) T(KI+1,KI+1)) ) +*/ + + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 = + t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + n2] = 1.; + } else { + work[ki + *n] = 1.; + work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + *n] = 0.; + work[ki + n2] = 0.; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 2; k <= i__2; ++k) { + work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; + work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] + ; +/* L190: */ + } + +/* + Solve complex quasi-triangular system: + ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +*/ + + vmax = 1.; + vcrit = bignum; + + jnxt = ki + 2; + i__2 = *n; + for (j = ki + 2; j <= i__2; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when + forming the right-hand side elements. +*/ + + if (work[j] > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + i__3 = j - ki - 2; + work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ + + d__1 = -wi; + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; +/* Computing MAX */ + d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2 + = work[j + n2], abs(d__2)), d__3 = max(d__3, + d__4); + vmax = max(d__3,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side elements. + + Computing MAX +*/ + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1,d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + n2], &c__1); + +/* + Solve 2-by-2 complex linear equation + ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B + ([T(j+1,j) T(j+1,j+1)] ) +*/ + + d__1 = -wi; + dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.) { + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + work[j + 1 + *n] = x[1]; + work[j + 1 + n2] = x[3]; +/* Computing MAX */ + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, + d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2) + , d__2 = abs(x[3]), d__1 = max(d__1,d__2); + vmax = max(d__1,vmax); + vcrit = bignum / vmax; + + } +L200: + ; + } + +/* + Copy the vector x or Q*x to VL and normalize. + + L210: +*/ + if (! over) { + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + i__2 = *n - ki + 1; + dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * + vl_dim1], &c__1); + + emax = 0.; + i__2 = *n; + for (k = ki; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L220: */ + } + remax = 1. / emax; + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + i__2 = *n - ki + 1; + dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) + ; + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; +/* L230: */ + } + } else { + if (ki < *n - 1) { + i__2 = *n - ki - 1; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + i__2 = *n - ki - 1; + dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ + ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & + c__1); + } else { + dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & + c__1); + dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + + 1], &c__1); + } + + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs( + d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1], + abs(d__2)); + emax = max(d__3,d__4); +/* L240: */ + } + remax = 1. / emax; + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + + } + + } + + ++is; + if (ip != 0) { + ++is; + } +L250: + if (ip == -1) { + ip = 0; + } + if (ip == 1) { + ip = -1; + } + +/* L260: */ + } + + } + + return 0; + +/* End of DTREVC */ + +} /* dtrevc_ */ + +/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer j; + static doublereal ajj; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + DTRTI2 computes the inverse of a real upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of DTRTI2 */ + +} /* dtrti2_ */ + +/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *), dtrsm_( + char *, char *, char *, char *, integer *, integer *, doublereal * + , doublereal *, integer *, doublereal *, integer *); + static logical upper; + extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal + *, integer *, integer *), xerbla_(char *, integer + *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + DTRTRI computes the inverse of a real upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) DOUBLE PRECISION array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("DTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dtrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b15, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + c_b151, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b15, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &c_b151, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of DTRTRI */ + +} /* dtrtri_ */ + diff --git a/numpy/linalg/lapack_lite/f2c_lapack.c b/numpy/linalg/lapack_lite/f2c_lapack.c new file mode 100644 index 000000000..7a0dd491d --- /dev/null +++ b/numpy/linalg/lapack_lite/f2c_lapack.c @@ -0,0 +1,797 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__0 = 0; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__1 = 1; + +integer ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, + newzro; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1998 + + + Purpose + ======= + + IEEECK is called from the ILAENV to verify that Infinity and + possibly NaN arithmetic is safe (i.e. will not trap). + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies whether to test just for inifinity arithmetic + or whether to test for infinity and NaN arithmetic. + = 0: Verify infinity arithmetic only. + = 1: Verify infinity and NaN arithmetic. + + ZERO (input) REAL + Must contain the value 0.0 + This is passed to prevent the compiler from optimizing + away this code. + + ONE (input) REAL + Must contain the value 1.0 + This is passed to prevent the compiler from optimizing + away this code. + + RETURN VALUE: INTEGER + = 0: Arithmetic failed to produce the correct answers + = 1: Arithmetic produced the correct answers +*/ + + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * 0.f; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* ieeeck_ */ + +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen + opts_len) +{ + /* System generated locals */ + integer ret_val; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + static integer i__; + static char c1[1], c2[2], c3[3], c4[2]; + static integer ic, nb, iz, nx; + static logical cname, sname; + static integer nbmin; + extern integer ieeeck_(integer *, real *, real *); + static char subnam[6]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ILAENV is called from the LAPACK routines to choose problem-dependent + parameters for the local environment. See ISPEC for a description of + the parameters. + + This version provides a set of parameters which should give good, + but not optimal, performance on many of the currently available + computers. Users are encouraged to modify this subroutine to set + the tuning parameters for their particular machine using the option + and problem size information in the arguments. + + This routine will not function correctly if it is converted to all + lower case. Converting it to all upper case is allowed. + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies the parameter to be returned as the value of + ILAENV. + = 1: the optimal blocksize; if this value is 1, an unblocked + algorithm will give the best performance. + = 2: the minimum block size for which the block routine + should be used; if the usable block size is less than + this value, an unblocked routine should be used. + = 3: the crossover point (in a block routine, for N less + than this value, an unblocked routine should be used) + = 4: the number of shifts, used in the nonsymmetric + eigenvalue routines + = 5: the minimum column dimension for blocking to be used; + rectangular blocks must have dimension at least k by m, + where k is given by ILAENV(2,...) and m by ILAENV(5,...) + = 6: the crossover point for the SVD (when reducing an m by n + matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds + this value, a QR factorization is used first to reduce + the matrix to a triangular form.) + = 7: the number of processors + = 8: the crossover point for the multishift QR and QZ methods + for nonsymmetric eigenvalue problems. + = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) + =10: ieee NaN arithmetic can be trusted not to trap + =11: infinity arithmetic can be trusted not to trap + + NAME (input) CHARACTER*(*) + The name of the calling subroutine, in either upper case or + lower case. + + OPTS (input) CHARACTER*(*) + The character options to the subroutine NAME, concatenated + into a single character string. For example, UPLO = 'U', + TRANS = 'T', and DIAG = 'N' for a triangular routine would + be specified as OPTS = 'UTN'. + + N1 (input) INTEGER + N2 (input) INTEGER + N3 (input) INTEGER + N4 (input) INTEGER + Problem dimensions for the subroutine NAME; these may not all + be required. + + (ILAENV) (output) INTEGER + >= 0: the value of the parameter specified by ISPEC + < 0: if ILAENV = -k, the k-th argument had an illegal value. + + Further Details + =============== + + The following conventions have been used when calling ILAENV from the + LAPACK routines: + 1) OPTS is a concatenation of all of the character options to + subroutine NAME, in the same order that they appear in the + argument list for NAME, even if they are not used in determining + the value of the parameter specified by ISPEC. + 2) The problem dimensions N1, N2, N3, N4 are specified in the order + that they appear in the argument list for NAME. N1 is used + first, N2 second, and so on, and unused problem dimensions are + passed a value of -1. + 3) The parameter value returned by ILAENV is checked for validity in + the calling subroutine. For example, ILAENV is used to retrieve + the optimal blocksize for STRTRI as follows: + + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = MAX( 1, N ) + + ===================================================================== +*/ + + + switch (*ispec) { + case 1: goto L100; + case 2: goto L100; + case 3: goto L100; + case 4: goto L400; + case 5: goto L500; + case 6: goto L600; + case 7: goto L700; + case 8: goto L800; + case 9: goto L900; + case 10: goto L1000; + case 11: goto L1100; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L100: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)6, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L10: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L20: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L30: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); + + switch (*ispec) { + case 1: goto L110; + case 2: goto L200; + case 3: goto L300; + } + +L110: + +/* + ISPEC = 1: block size + + In these examples, separate code is provided for setting NB for + real and complex. We assume that NB will take the same value in + single or double precision. +*/ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L200: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L300: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L400: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L500: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L600: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L700: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L800: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L900: + +/* + ISPEC = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) +*/ + + ret_val = 25; + return ret_val; + +L1000: + +/* + ISPEC = 10: ieee NaN arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__0, &c_b163, &c_b164); + } + return ret_val; + +L1100: + +/* + ISPEC = 11: infinity arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__1, &c_b163, &c_b164); + } + return ret_val; + +/* End of ILAENV */ + +} /* ilaenv_ */ + diff --git a/numpy/linalg/lapack_lite/f2c_lite.c b/numpy/linalg/lapack_lite/f2c_lite.c deleted file mode 100644 index c0814b3bf..000000000 --- a/numpy/linalg/lapack_lite/f2c_lite.c +++ /dev/null @@ -1,671 +0,0 @@ -#include -#include -#include -#include -#include "f2c.h" - - -extern void s_wsfe(cilist *f) {;} -extern void e_wsfe(void) {;} -extern void do_fio(integer *c, char *s, ftnlen l) {;} - -/* You'll want this if you redo the *_lite.c files with the -C option - * to f2c for checking array subscripts. (It's not suggested you do that - * for production use, of course.) */ -extern int -s_rnge(char *var, int index, char *routine, int lineno) -{ - fprintf(stderr, "array index out-of-bounds for %s[%d] in routine %s:%d\n", - var, index, routine, lineno); - fflush(stderr); - abort(); -} - -#ifdef KR_headers -extern float sqrtf(); -double f__cabsf(real, imag) float real, imag; -#else -#undef abs - -double f__cabsf(float real, float imag) -#endif -{ -float temp; - -if(real < 0.0f) - real = -real; -if(imag < 0.0f) - imag = -imag; -if(imag > real){ - temp = real; - real = imag; - imag = temp; -} -if((imag+real) == real) - return((float)real); - -temp = imag/real; -temp = real*sqrtf(1.0 + temp*temp); /*overflow!!*/ -return(temp); -} - - -#ifdef KR_headers -extern double sqrt(); -double f__cabs(real, imag) double real, imag; -#else -#undef abs - -double f__cabs(double real, double imag) -#endif -{ -double temp; - -if(real < 0) - real = -real; -if(imag < 0) - imag = -imag; -if(imag > real){ - temp = real; - real = imag; - imag = temp; -} -if((imag+real) == real) - return((double)real); - -temp = imag/real; -temp = real*sqrt(1.0 + temp*temp); /*overflow!!*/ -return(temp); -} - - VOID -#ifdef KR_headers -r_cnjg(r, z) complex *r, *z; -#else -r_cnjg(complex *r, complex *z) -#endif -{ -r->r = z->r; -r->i = - z->i; -} - - VOID -#ifdef KR_headers -d_cnjg(r, z) doublecomplex *r, *z; -#else -d_cnjg(doublecomplex *r, doublecomplex *z) -#endif -{ -r->r = z->r; -r->i = - z->i; -} - - -#ifdef KR_headers -float r_imag(z) complex *z; -#else -float r_imag(complex *z) -#endif -{ -return(z->i); -} - -#ifdef KR_headers -double d_imag(z) doublecomplex *z; -#else -double d_imag(doublecomplex *z) -#endif -{ -return(z->i); -} - - -#define log10e 0.43429448190325182765 - -#ifdef KR_headers -float logf(); -float r_lg10(x) real *x; -#else -#undef abs - -float r_lg10(real *x) -#endif -{ -return( log10e * logf(*x) ); -} - -#ifdef KR_headers -double log(); -double d_lg10(x) doublereal *x; -#else -#undef abs - -double d_lg10(doublereal *x) -#endif -{ -return( log10e * log(*x) ); -} - -#ifdef KR_headers -double r_sign(a,b) real *a, *b; -#else -double r_sign(real *a, real *b) -#endif -{ -float x; -x = (*a >= 0.0f ? *a : - *a); -return( *b >= 0.0f ? x : -x); -} - -#ifdef KR_headers -double d_sign(a,b) doublereal *a, *b; -#else -double d_sign(doublereal *a, doublereal *b) -#endif -{ -double x; -x = (*a >= 0 ? *a : - *a); -return( *b >= 0 ? x : -x); -} - - -#ifdef KR_headers -double floor(); -integer i_dnnt(x) doublereal *x; -#else -#undef abs - -integer i_dnnt(doublereal *x) -#endif -{ -return( (*x)>=0 ? - floor(*x + .5) : -floor(.5 - *x) ); -} - - -#ifdef KR_headers -double pow(); -double pow_dd(ap, bp) doublereal *ap, *bp; -#else -#undef abs - -double pow_dd(doublereal *ap, doublereal *bp) -#endif -{ -return(pow(*ap, *bp) ); -} - - -#ifdef KR_headers -double pow_ri(ap, bp) real *ap; integer *bp; -#else -double pow_ri(real *ap, integer *bp) -#endif -{ -float pow, x; -integer n; -unsigned long u; - -pow = 1; -x = *ap; -n = *bp; - -if(n != 0) - { - if(n < 0) - { - n = -n; - x = 1.0f/x; - } - for(u = n; ; ) - { - if(u & 01) - pow *= x; - if(u >>= 1) - x *= x; - else - break; - } - } -return(pow); -} - -#ifdef KR_headers -double pow_di(ap, bp) doublereal *ap; integer *bp; -#else -double pow_di(doublereal *ap, integer *bp) -#endif -{ -double pow, x; -integer n; -unsigned long u; - -pow = 1; -x = *ap; -n = *bp; - -if(n != 0) - { - if(n < 0) - { - n = -n; - x = 1/x; - } - for(u = n; ; ) - { - if(u & 01) - pow *= x; - if(u >>= 1) - x *= x; - else - break; - } - } -return(pow); -} -/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the - * target of a concatenation to appear on its right-hand side (contrary - * to the Fortran 77 Standard, but in accordance with Fortran 90). - */ -#define NO_OVERWRITE - - -#ifndef NO_OVERWRITE - -#undef abs -#ifdef KR_headers - extern char *F77_aloc(); - extern void free(); - extern void exit_(); -#else - - extern char *F77_aloc(ftnlen, char*); -#endif - -#endif /* NO_OVERWRITE */ - - VOID -#ifdef KR_headers -s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll; -#else -s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll) -#endif -{ - ftnlen i, nc; - char *rp; - ftnlen n = *np; -#ifndef NO_OVERWRITE - ftnlen L, m; - char *lp0, *lp1; - - lp0 = 0; - lp1 = lp; - L = ll; - i = 0; - while(i < n) { - rp = rpp[i]; - m = rnp[i++]; - if (rp >= lp1 || rp + m <= lp) { - if ((L -= m) <= 0) { - n = i; - break; - } - lp1 += m; - continue; - } - lp0 = lp; - lp = lp1 = F77_aloc(L = ll, "s_cat"); - break; - } - lp1 = lp; -#endif /* NO_OVERWRITE */ - for(i = 0 ; i < n ; ++i) { - nc = ll; - if(rnp[i] < nc) - nc = rnp[i]; - ll -= nc; - rp = rpp[i]; - while(--nc >= 0) - *lp++ = *rp++; - } - while(--ll >= 0) - *lp++ = ' '; -#ifndef NO_OVERWRITE - if (lp0) { - memmove(lp0, lp1, L); - free(lp1); - } -#endif - } - - -/* compare two strings */ - -#ifdef KR_headers -integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb; -#else -integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) -#endif -{ -register unsigned char *a, *aend, *b, *bend; -a = (unsigned char *)a0; -b = (unsigned char *)b0; -aend = a + la; -bend = b + lb; - -if(la <= lb) - { - while(a < aend) - if(*a != *b) - return( *a - *b ); - else - { ++a; ++b; } - - while(b < bend) - if(*b != ' ') - return( ' ' - *b ); - else ++b; - } - -else - { - while(b < bend) - if(*a == *b) - { ++a; ++b; } - else - return( *a - *b ); - while(a < aend) - if(*a != ' ') - return(*a - ' '); - else ++a; - } -return(0); -} -/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the - * target of an assignment to appear on its right-hand side (contrary - * to the Fortran 77 Standard, but in accordance with Fortran 90), - * as in a(2:5) = a(4:7) . - */ - - - -/* assign strings: a = b */ - -#ifdef KR_headers -VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb; -#else -void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) -#endif -{ - register char *aend, *bend; - - aend = a + la; - - if(la <= lb) -#ifndef NO_OVERWRITE - if (a <= b || a >= b + la) -#endif - while(a < aend) - *a++ = *b++; -#ifndef NO_OVERWRITE - else - for(b += la; a < aend; ) - *--aend = *--b; -#endif - - else { - bend = b + lb; -#ifndef NO_OVERWRITE - if (a <= b || a >= bend) -#endif - while(b < bend) - *a++ = *b++; -#ifndef NO_OVERWRITE - else { - a += lb; - while(b < bend) - *--a = *--bend; - a += lb; - } -#endif - while(a < aend) - *a++ = ' '; - } - } - - -#ifdef KR_headers -double f__cabsf(); -double c_abs(z) complex *z; -#else -double f__cabsf(float, float); -double c_abs(complex *z) -#endif -{ -return( f__cabsf( z->r, z->i ) ); -} - -#ifdef KR_headers -double f__cabs(); -double z_abs(z) doublecomplex *z; -#else -double f__cabs(double, double); -double z_abs(doublecomplex *z) -#endif -{ -return( f__cabs( z->r, z->i ) ); -} - - -#ifdef KR_headers -extern void sig_die(); -VOID c_div(c, a, b) complex *a, *b, *c; -#else -extern void sig_die(char*, int); -void c_div(complex *c, complex *a, complex *b) -#endif -{ -float ratio, den; -float abr, abi; - -if( (abr = b->r) < 0.f) - abr = - abr; -if( (abi = b->i) < 0.f) - abi = - abi; -if( abr <= abi ) - { - /*Let IEEE Infinties handle this ;( */ - /*if(abi == 0) - sig_die("complex division by zero", 1);*/ - ratio = b->r / b->i ; - den = b->i * (1 + ratio*ratio); - c->r = (a->r*ratio + a->i) / den; - c->i = (a->i*ratio - a->r) / den; - } - -else - { - ratio = b->i / b->r ; - den = b->r * (1.f + ratio*ratio); - c->r = (a->r + a->i*ratio) / den; - c->i = (a->i - a->r*ratio) / den; - } - -} - -#ifdef KR_headers -extern void sig_die(); -VOID z_div(c, a, b) doublecomplex *a, *b, *c; -#else -extern void sig_die(char*, int); -void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) -#endif -{ -double ratio, den; -double abr, abi; - -if( (abr = b->r) < 0.) - abr = - abr; -if( (abi = b->i) < 0.) - abi = - abi; -if( abr <= abi ) - { - /*Let IEEE Infinties handle this ;( */ - /*if(abi == 0) - sig_die("complex division by zero", 1);*/ - ratio = b->r / b->i ; - den = b->i * (1 + ratio*ratio); - c->r = (a->r*ratio + a->i) / den; - c->i = (a->i*ratio - a->r) / den; - } - -else - { - ratio = b->i / b->r ; - den = b->r * (1 + ratio*ratio); - c->r = (a->r + a->i*ratio) / den; - c->i = (a->i - a->r*ratio) / den; - } - -} - - -#ifdef KR_headers -float sqrtf(), f__cabsf(); -VOID c_sqrt(r, z) complex *r, *z; -#else -#undef abs - -extern double f__cabsf(float, float); -void c_sqrt(complex *r, complex *z) -#endif -{ -float mag; - -if( (mag = f__cabsf(z->r, z->i)) == 0.f) - r->r = r->i = 0.f; -else if(z->r > 0.0f) - { - r->r = sqrtf(0.5f * (mag + z->r) ); - r->i = z->i / r->r / 2.0f; - } -else - { - r->i = sqrtf(0.5f * (mag - z->r) ); - if(z->i < 0.0f) - r->i = - r->i; - r->r = z->i / r->i / 2.0f; - } -} - - -#ifdef KR_headers -double sqrt(), f__cabs(); -VOID z_sqrt(r, z) doublecomplex *r, *z; -#else -#undef abs - -extern double f__cabs(double, double); -void z_sqrt(doublecomplex *r, doublecomplex *z) -#endif -{ -double mag; - -if( (mag = f__cabs(z->r, z->i)) == 0.) - r->r = r->i = 0.; -else if(z->r > 0) - { - r->r = sqrt(0.5 * (mag + z->r) ); - r->i = z->i / r->r / 2; - } -else - { - r->i = sqrt(0.5 * (mag - z->r) ); - if(z->i < 0) - r->i = - r->i; - r->r = z->i / r->i / 2; - } -} -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef KR_headers -integer pow_ii(ap, bp) integer *ap, *bp; -#else -integer pow_ii(integer *ap, integer *bp) -#endif -{ - integer pow, x, n; - unsigned long u; - - x = *ap; - n = *bp; - - if (n <= 0) { - if (n == 0 || x == 1) - return 1; - if (x != -1) - return x == 0 ? 1/x : 0; - n = -n; - } - u = n; - for(pow = 1; ; ) - { - if(u & 01) - pow *= x; - if(u >>= 1) - x *= x; - else - break; - } - return(pow); - } -#ifdef __cplusplus -} -#endif - -#ifdef KR_headers -extern void f_exit(); -VOID s_stop(s, n) char *s; ftnlen n; -#else -#undef abs -#undef min -#undef max -#ifdef __cplusplus -extern "C" { -#endif -#ifdef __cplusplus -extern "C" { -#endif -void f_exit(void); - -int s_stop(char *s, ftnlen n) -#endif -{ -int i; - -if(n > 0) - { - fprintf(stderr, "STOP "); - for(i = 0; i= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B. + + E (input/output) REAL array, dimension (N) + On entry, the elements of E contain the offdiagonal + elements of the bidiagonal matrix whose SVD is desired. + On exit, E has been destroyed. + + U (output) REAL array, dimension (LDU,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, U contains the left singular vectors + of the bidiagonal matrix. + For other values of COMPQ, U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1. + If singular vectors are desired, then LDU >= max( 1, N ). + + VT (output) REAL array, dimension (LDVT,N) + If COMPQ = 'I', then: + On exit, if INFO = 0, VT' contains the right singular + vectors of the bidiagonal matrix. + For other values of COMPQ, VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1. + If singular vectors are desired, then LDVT >= max( 1, N ). + + Q (output) REAL array, dimension (LDQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, Q contains all the REAL data in + LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, Q is not referenced. + + IQ (output) INTEGER array, dimension (LDIQ) + If COMPQ = 'P', then: + On exit, if INFO = 0, Q and IQ contain the left + and right singular vectors in a compact form, + requiring O(N log N) space instead of 2*N**2. + In particular, IQ contains all INTEGER data in + LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) + words of memory, where SMLSIZ is returned by ILAENV and + is equal to the maximum size of the subproblems at the + bottom of the computation tree (usually about 25). + For other values of COMPQ, IQ is not referenced. + + WORK (workspace) REAL array, dimension (LWORK) + If COMPQ = 'N' then LWORK >= (4 * N). + If COMPQ = 'P' then LWORK >= (6 * N). + If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). + + IWORK (workspace) INTEGER array, dimension (8*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an singular value. + The update process of divide and conquer failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (lsame_(compq, "N")) { + icompq = 0; + } else if (lsame_(compq, "P")) { + icompq = 1; + } else if (lsame_(compq, "I")) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SBDSDC", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = r_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.f; + } else if (icompq == 2) { + u[u_dim1 + 1] = r_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.f; + } + d__[1] = dabs(d__[1]); + return 0; + } + nm1 = *n - 1; + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + wstart = 1; + qstart = 3; + if (icompq == 1) { + scopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + wstart = (*n << 1) - 1; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } +/* L10: */ + } + } + +/* If ICOMPQ = 0, use SLASDQ to compute the singular values. */ + + if (icompq == 0) { + slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + goto L40; + } + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompq == 2) { + slaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + slaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ + wstart], info); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + slaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); + slaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); + slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( + qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ + iu + (qstart - 1) * *n], n, &work[wstart], info); + } + goto L40; + } + + if (icompq == 2) { + slaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); + slaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); + } + +/* Scale. */ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + return 0; + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & + ierr); + + eps = slamch_("Epsilon"); + + mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1; + smlszp = smlsiz + 1; + + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) < eps) { + d__[i__] = r_sign(&eps, &d__[i__]); + } +/* L20: */ + } + + start = 1; + sqre = 0; + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - start + 1; + } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - start + 1; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N). Solve this 1-by-1 problem + first. +*/ + + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = r_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.f; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = r_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.f; + } + d__[*n] = (r__1 = d__[*n], dabs(r__1)); + } + if (icompq == 2) { + slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + + start * u_dim1], ldu, &vt[start + start * vt_dim1], + ldvt, &smlsiz, &iwork[1], &work[wstart], info); + } else { + slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ + start], &q[start + (iu + qstart - 2) * *n], n, &q[ + start + (ivt + qstart - 2) * *n], &iq[start + k * *n], + &q[start + (difl + qstart - 2) * *n], &q[start + ( + difr + qstart - 2) * *n], &q[start + (z__ + qstart - + 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ + start + givptr * *n], &iq[start + givcol * *n], n, & + iq[start + perm * *n], &q[start + (givnum + qstart - + 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ + start + (is + qstart - 2) * *n], &work[wstart], & + iwork[1], info); + if (*info != 0) { + return 0; + } + } + start = i__ + 1; + } +/* L30: */ + } + +/* Unscale */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); +L40: + +/* Use Selection Sort to minimize swaps of singular vectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } +/* L50: */ + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], & + c__1); + sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } +/* L60: */ + } + +/* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ + + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + +/* + If B is lower bidiagonal, update U by those Givens rotations + which rotated B to be upper bidiagonal +*/ + + if (iuplo == 2 && icompq == 2) { + slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); + } + + return 0; + +/* End of SBDSDC */ + +} /* sbdsdc_ */ + +/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * + u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + real r__1, r__2, r__3, r__4; + doublereal d__1; + + /* Builtin functions */ + double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real * + , real *); + + /* Local variables */ + static real f, g, h__; + static integer i__, j, m; + static real r__, cs; + static integer ll; + static real sn, mu; + static integer nm1, nm12, nm13, lll; + static real eps, sll, tol, abse; + static integer idir; + static real abss; + static integer oldm; + static real cosl; + static integer isub, iter; + static real unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), slas2_(real *, real *, real *, real *, + real *); + extern logical lsame_(char *, char *); + static real oldcs; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer oldll; + static real shift, sigmn, oldsn; + static integer maxit; + static real sminl; + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *); + static real sigmx; + static logical lower; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slasq1_(integer *, real *, real *, real *, integer *), + slasv2_(real *, real *, real *, real *, real *, real *, real *, + real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real sminoa; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ); + static real thresh; + static logical rotate; + static real sminlo, tolmul; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SBDSQR computes the singular value decomposition (SVD) of a real + N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' + denotes the transpose of P), where S is a diagonal matrix with + non-negative diagonal elements (the singular values of B), and Q + and P are orthogonal matrices. + + The routine computes S, and optionally computes U * Q, P' * VT, + or Q' * C, for given real input matrices U, VT, and C. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, + no. 5, pp. 873-912, Sept 1990) and + "Accurate singular values and differential qd algorithms," by + B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics + Department, University of California at Berkeley, July 1992 + for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': B is upper bidiagonal; + = 'L': B is lower bidiagonal. + + N (input) INTEGER + The order of the matrix B. N >= 0. + + NCVT (input) INTEGER + The number of columns of the matrix VT. NCVT >= 0. + + NRU (input) INTEGER + The number of rows of the matrix U. NRU >= 0. + + NCC (input) INTEGER + The number of columns of the matrix C. NCC >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the bidiagonal matrix B. + On exit, if INFO=0, the singular values of B in decreasing + order. + + E (input/output) REAL array, dimension (N) + On entry, the elements of E contain the + offdiagonal elements of the bidiagonal matrix whose SVD + is desired. On normal exit (INFO = 0), E is destroyed. + If the algorithm does not converge (INFO > 0), D and E + will contain the diagonal and superdiagonal elements of a + bidiagonal matrix orthogonally equivalent to the one given + as input. E(N) is used for workspace. + + VT (input/output) REAL array, dimension (LDVT, NCVT) + On entry, an N-by-NCVT matrix VT. + On exit, VT is overwritten by P' * VT. + VT is not referenced if NCVT = 0. + + LDVT (input) INTEGER + The leading dimension of the array VT. + LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. + + U (input/output) REAL array, dimension (LDU, N) + On entry, an NRU-by-N matrix U. + On exit, U is overwritten by U * Q. + U is not referenced if NRU = 0. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max(1,NRU). + + C (input/output) REAL array, dimension (LDC, NCC) + On entry, an N-by-NCC matrix C. + On exit, C is overwritten by Q' * C. + C is not referenced if NCC = 0. + + LDC (input) INTEGER + The leading dimension of the array C. + LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. + + WORK (workspace) REAL array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: If INFO = -i, the i-th argument had an illegal value + > 0: the algorithm did not converge; D and E contain the + elements of a bidiagonal matrix which is orthogonally + similar to the input matrix B; if INFO = i, i + elements of E have not converged to zero. + + Internal Parameters + =================== + + TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) + TOLMUL controls the convergence criterion of the QR loop. + If it is positive, TOLMUL*EPS is the desired relative + precision in the computed singular values. + If it is negative, abs(TOLMUL*EPS*sigma_max) is the + desired absolute accuracy in the computed singular + values (corresponds to relative accuracy + abs(TOLMUL*EPS) in the largest singular value. + abs(TOLMUL) should be between 1 and 1/EPS, and preferably + between 10 (for fast convergence) and .1/EPS + (for there to be some accuracy in the results). + Default is to lose at either one eighth or 2 of the + available decimal digits in each computed singular value + (whichever is smaller). + + MAXITR INTEGER, default = 6 + MAXITR controls the maximum number of passes of the + algorithm through its inner loop. The algorithms stops + (and so fails to converge) if the number of passes + through the inner loop exceeds MAXITR*N**2. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, "L"); + if (! lsame_(uplo, "U") && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SBDSQR", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + slasq1_(n, &d__[1], &e[1], &work[1], info); + return 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = slamch_("Epsilon"); + unfl = slamch_("Safe minimum"); + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left +*/ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu); + } + if (*ncc > 0) { + slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc); + } + } + +/* + Compute singular values to relative accuracy TOL + (By setting TOL to be negative, algorithm will compute + singular values to absolute accuracy ABS(TOL)*norm(input matrix)) + + Computing MAX + Computing MIN +*/ + d__1 = (doublereal) eps; + r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b94); + r__1 = 10.f, r__2 = dmin(r__3,r__4); + tolmul = dmax(r__1,r__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.f; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1)); + smax = dmax(r__2,r__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1)); + smax = dmax(r__2,r__3); +/* L30: */ + } + sminl = 0.f; + if (tol >= 0.f) { + +/* Relative accuracy desired */ + + sminoa = dabs(d__[1]); + if (sminoa == 0.f) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - + 1], dabs(r__1)))); + sminoa = dmin(sminoa,mu); + if (sminoa == 0.f) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((real) (*n)); +/* Computing MAX */ + r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; + thresh = dmax(r__1,r__2); + } else { + +/* + Absolute accuracy desired + + Computing MAX +*/ + r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl; + thresh = dmax(r__1,r__2); + } + +/* + Prepare for main iteration loop for the singular values + (MAXIT is the maximum number of passes through the inner + loop permitted before nonconvergence signalled.) +*/ + + maxit = *n * 6 * *n; + iter = 0; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + if (iter > maxit) { + goto L200; + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) { + d__[m] = 0.f; + } + smax = (r__1 = d__[m], dabs(r__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (r__1 = d__[ll], dabs(r__1)); + abse = (r__1 = e[ll], dabs(r__1)); + if (tol < 0.f && abss <= thresh) { + d__[ll] = 0.f; + } + if (abse <= thresh) { + goto L80; + } + smin = dmin(smin,abss); +/* Computing MAX */ + r__1 = max(smax,abss); + smax = dmax(r__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.f; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.f; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* + If working on new submatrix, choose shift direction + (from larger end diagonal element towards smaller) +*/ + + if (ll > oldm || m < oldll) { + if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* + Run convergence test in forward direction + First apply standard test to bottom of matrix +*/ + + if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs( + r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= + thresh) { + e[m - 1] = 0.f; + goto L60; + } + + if (tol >= 0.f) { + +/* + If relative accuracy desired, + apply convergence criterion forward +*/ + + mu = (r__1 = d__[ll], dabs(r__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { + e[lll] = 0.f; + goto L60; + } + sminlo = sminl; + mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = + e[lll], dabs(r__1)))); + sminl = dmin(sminl,mu); +/* L100: */ + } + } + + } else { + +/* + Run convergence test in backward direction + First apply standard test to top of matrix +*/ + + if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs( + r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) { + e[ll] = 0.f; + goto L60; + } + + if (tol >= 0.f) { + +/* + If relative accuracy desired, + apply convergence criterion backward +*/ + + mu = (r__1 = d__[m], dabs(r__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { + e[lll] = 0.f; + goto L60; + } + sminlo = sminl; + mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[ + lll], dabs(r__1)))); + sminl = dmin(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* + Compute shift. First, test if shifting would ruin relative + accuracy, and if so set the shift to zero. + + Computing MAX +*/ + r__1 = eps, r__2 = tol * .01f; + if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.f; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (r__1 = d__[ll], dabs(r__1)); + slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (r__1 = d__[m], dabs(r__1)); + slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.f) { +/* Computing 2nd power */ + r__1 = shift / sll; + if (r__1 * r__1 < eps) { + shift = 0.f; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.f) { + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + cs = 1.f; + oldcs = 1.f; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + r__1 = d__[i__] * cs; + slartg_(&r__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + r__1 = oldcs * r__; + r__2 = d__[i__ + 1] * sn; + slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { + e[m - 1] = 0.f; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + cs = 1.f; + oldcs = 1.f; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + r__1 = d__[i__] * cs; + slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + r__1 = oldcs * r__; + r__2 = d__[i__ - 1] * sn; + slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[ll], dabs(r__1)) <= thresh) { + e[ll] = 0.f; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* + Chase bulge from top to bottom + Save cosines and sines for later singular vector updates +*/ + + f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b15, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + slartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + slartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc); + } + +/* Test convergence */ + + if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { + e[m - 1] = 0.f; + } + + } else { + +/* + Chase bulge from bottom to top + Save cosines and sines for later singular vector updates +*/ + + f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b15, &d__[ + m]) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + slartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + slartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((r__1 = e[ll], dabs(r__1)) <= thresh) { + e[ll] = 0.f; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt); + } + if (*nru > 0) { + i__1 = m - ll + 1; + slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.f) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + sscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* + Sort the singular values into decreasing order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of SBDSQR */ + +} /* sbdsqr_ */ + +/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer + *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static real s; + static integer ii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static logical leftv; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), xerbla_(char *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SGEBAK forms the right or left eigenvectors of a real general matrix + by backward transformation on the computed eigenvectors of the + balanced matrix output by SGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to SGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by SGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) REAL array, dimension (N) + Details of the permutation and scaling factors, as returned + by SGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) REAL array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by SHSEIN or STREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1.f / scale[i__]; + sscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L40; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = scale[i__]; + if (k == i__) { + goto L50; + } + sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of SGEBAK */ + +} /* sgebak_ */ + +/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda, + integer *ilo, integer *ihi, real *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1, r__2; + + /* Local variables */ + static real c__, f, g; + static integer i__, j, k, l, m; + static real r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sswap_(integer *, real *, integer *, real *, integer *); + static real sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SGEBAL balances a general real matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) REAL array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine BALANC. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (real) j; + if (j == m) { + goto L30; + } + + sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.f) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.f) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.f; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = slamch_("S") / slamch_("P"); + sfmax1 = 1.f / sfmin1; + sfmin2 = sfmin1 * 8.f; + sfmax2 = 1.f / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.f; + r__ = 0.f; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1)); + r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +L150: + ; + } + ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1)); + i__2 = *n - k + 1; + ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1)); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0.f || r__ == 0.f) { + goto L200; + } + g = r__ / 8.f; + f = 1.f; + s = c__ + r__; +L160: +/* Computing MAX */ + r__1 = max(f,c__); +/* Computing MIN */ + r__2 = min(r__,g); + if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) { + goto L170; + } + f *= 8.f; + c__ *= 8.f; + ca *= 8.f; + r__ /= 8.f; + g /= 8.f; + ra /= 8.f; + goto L160; + +L170: + g = c__ / 8.f; +L180: +/* Computing MIN */ + r__1 = min(f,c__), r__1 = min(r__1,g); + if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) { + goto L190; + } + f /= 8.f; + c__ /= 8.f; + g /= 8.f; + ca /= 8.f; + r__ *= 8.f; + ra *= 8.f; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95f) { + goto L200; + } + if (f < 1.f && scale[i__] < 1.f) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1.f && scale[i__] > 1.f) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1.f / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of SGEBAL */ + +} /* sgebal_ */ + +/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SGEBD2 reduces a real general m by n matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) REAL array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace) REAL array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.f; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; +/* Computing MIN */ + i__4 = i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ + i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.f; + } +/* L20: */ + } + } + return 0; + +/* End of SGEBD2 */ + +} /* sgebd2_ */ + +/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda, + real *d__, real *e, real *tauq, real *taup, real *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nx; + static real ws; + static integer nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer minmn; + extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, integer *), slabrd_( + integer *, integer *, integer *, real *, integer *, real *, real * + , real *, real *, real *, integer *, real *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SGEBRD reduces a general real M-by-N matrix A to upper or lower + bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the orthogonal matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the orthogonal matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) REAL array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) REAL array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); + u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("SGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.f; + return 0; + } + + ws = (real) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (real) ((*m + *n) * nb); + if ((real) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+nb-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+nb:m,i+nb:n), using an update + of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[ + i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = ws; + return 0; + +/* End of SGEBRD */ + +} /* sgebrd_ */ + +/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a, + integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr, + integer *ldvr, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, k; + static real r__, cs, sn; + static integer ihi; + static real scl; + static integer ilo; + static real dum[1], eps; + static integer ibal; + static char side[1]; + static integer maxb; + static real anrm; + static integer ierr, itau, iwrk, nout; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + extern doublereal snrm2_(integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + extern doublereal slapy2_(real *, real *); + extern /* Subroutine */ int slabad_(real *, real *); + static logical scalea; + static real cscale; + extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, + integer *, integer *, real *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slartg_(real *, real *, + real *, real *, real *), sorghr_(integer *, integer *, integer *, + real *, integer *, real *, real *, integer *, integer *), shseqr_( + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *, + real *, integer *, real *, integer *, real *, integer *, integer * + , integer *, real *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static real smlnum; + static integer hswork; + static logical lquery, wantvr; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + December 8, 1999 + + + Purpose + ======= + + SGEEV computes for an N-by-N real nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of A are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + WR and WI contain the real and imaginary parts, + respectively, of the computed eigenvalues. Complex + conjugate pairs of eigenvalues appear consecutively + with the eigenvalue having the positive imaginary part + first. + + VL (output) REAL array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + If the j-th eigenvalue is real, then u(j) = VL(:,j), + the j-th column of VL. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and + u(j+1) = VL(:,j) - i*VL(:,j+1). + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) REAL array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + If the j-th eigenvalue is real, then v(j) = VR(:,j), + the j-th column of VR. + If the j-th and (j+1)-st eigenvalues form a complex + conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and + v(j+1) = VR(:,j) - i*VR(:,j+1). + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,3*N), and + if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good + performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements i+1:N of WR and WI contain eigenvalues which + have converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by SHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + minwrk = 1; + if (*info == 0 && (*lwork >= 1 || lquery)) { + maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, & + c__0, (ftnlen)6, (ftnlen)1); + if (! wantvl && ! wantvr) { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + + hswork; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 2; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "SOR" + "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + + hswork; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1,i__2); + } + work[1] = (real) maxwrk; + } + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + slabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0.f && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (Workspace: need N) +*/ + + ibal = 1; + sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate orthogonal matrix in VL + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vl[vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate orthogonal matrix in VR + (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +*/ + + i__1 = *lwork - iwrk + 1; + sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (Workspace: need N+1, prefer N+HSWORK (see comments) ) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], & + vr[vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from SHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (Workspace: need 4*N) +*/ + + strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (Workspace: need N) +*/ + + sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, + &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vl[k + i__ * vl_dim1]; +/* Computing 2nd power */ + r__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L10: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], + &cs, &sn, &r__); + srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * + vl_dim1 + 1], &c__1, &cs, &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.f; + } +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (Workspace: need N) +*/ + + sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, + &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.f) { + scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.f) { + r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1.f / slapy2_(&r__1, &r__2); + sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing 2nd power */ + r__1 = vr[k + i__ * vr_dim1]; +/* Computing 2nd power */ + r__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2; +/* L30: */ + } + k = isamax_(n, &work[iwrk], &c__1); + slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], + &cs, &sn, &r__); + srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * + vr_dim1 + 1], &c__1, &cs, &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.f; + } +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + + 1], &i__2, &ierr); + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + + 1], &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], + n, &ierr); + i__1 = ilo - 1; + slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], + n, &ierr); + } + } + + work[1] = (real) maxwrk; + return 0; + +/* End of SGEEV */ + +} /* sgeev_ */ + +/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SGEHD2 reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) REAL array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + a[i__ + 1 + i__ * a_dim1] = aii; +/* L10: */ + } + + return 0; + +/* End of SGEHD2 */ + +} /* sgehd2_ */ + +/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer ib; + static real ei; + static integer nb, nh, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), sgehd2_(integer *, integer *, + integer *, real *, integer *, real *, real *, integer *), slarfb_( + char *, char *, char *, char *, integer *, integer *, integer *, + real *, integer *, real *, integer *, real *, integer *, real *, + integer *), slahrd_(integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SGEHRD reduces a real general matrix A to upper Hessenberg form H by + an orthogonal similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the orthogonal matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.f; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.f; + return 0; + } + +/* + Determine the block size. + + Computing MIN +*/ + i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code. + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1. +*/ + + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f; + i__3 = *ihi - i__ - ib + 1; + sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, & + work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[ + i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork); +/* L30: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (real) iws; + + return 0; + +/* End of SGEHRD */ + +} /* sgehrd_ */ + +/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SGELQ2 computes an LQ factorization of a real m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGELQ2 */ + +} /* sgelq2_ */ + +/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer + *, real *, real *, integer *), slarfb_(char *, char *, char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SGELQF computes an LQ factorization of a real M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k) . . . H(2) H(1), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGELQF */ + +} /* sgelqf_ */ + +/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static real aii; + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SGEQR2 computes a QR factorization of a real m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of SGEQR2 */ + +} /* sgeqr2_ */ + +/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda, + real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer + *, real *, real *, integer *), slarfb_(char *, char *, char *, + char *, integer *, integer *, integer *, real *, integer *, real * + , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SGEQRF computes a QR factorization of a real M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the orthogonal matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) REAL array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (real) iws; + return 0; + +/* End of SGEQRF */ + +} /* sgeqrf_ */ + +/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a, + integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt, + real *work, integer *lwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static real dum[1], eps; + static integer ivt, iscl; + static real anrm; + static integer idum[1], ierr, itau; + extern logical lsame_(char *, char *); + static integer chunk; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer minmn, wrkbl, itaup, itauq, mnthr; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + static integer bdspac; + extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *, + real *, real *, integer *, real *, integer *, real *, integer *, + real *, integer *, integer *), sgebrd_(integer *, + integer *, real *, integer *, real *, real *, real *, real *, + real *, integer *, integer *); + extern doublereal slamch_(char *), slange_(char *, integer *, + integer *, real *, integer *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static real bignum; + extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer + *, real *, real *, integer *, integer *), slascl_(char *, integer + *, integer *, real *, real *, integer *, integer *, real *, + integer *, integer *), sgeqrf_(integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), slacpy_(char + *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), sorgbr_(char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, integer * + ); + static integer ldwrkl; + extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , real *, integer *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static integer ldwkvt; + static real smlnum; + static logical wntqas; + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static logical lquery; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SGESDD computes the singular value decomposition (SVD) of a real + M-by-N matrix A, optionally computing the left and right singular + vectors. If singular vectors are desired, it uses a + divide-and-conquer algorithm. + + The SVD is written + + A = U * SIGMA * transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and + V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**T, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**T are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**T are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**T are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**T are overwritten + in the array VT; + = 'N': no columns of U or rows of V**T are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**T (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) REAL array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) REAL array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + orthogonal matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) REAL array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N orthogonal matrix V**T; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**T (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK; + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + If JOBZ = 'N', + LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)). + If JOBZ = 'O', + LWORK >= 3*min(M,N)*min(M,N) + + max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)). + If JOBZ = 'S' or 'A' + LWORK >= 3*min(M,N)*min(M,N) + + max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)). + For good performance, LWORK should generally be larger. + If LWORK < 0 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: SBDSDC did not converge, updating process failed. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr = (integer) (minmn * 11.f / 6.f); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* Compute space needed for SBDSDC */ + + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + if (*m >= mnthr) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1, + "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } + } else { + +/* Path 5 (M at least N, but not much larger) */ + + wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = bdspac + *n * 3; + maxwrk = max(i__1,i__2); + minwrk = *n * 3 + max(*m,bdspac); + } + } + } else { + +/* Compute space needed for SBDSDC */ + + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + if (*n >= mnthr) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1,i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1, + "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } + } else { + +/* Path 5t (N greater than M, but not much larger) */ + + wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (wntqn) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqo) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + wrkbl = max(i__1,i__2); + maxwrk = wrkbl + *m * *n; +/* Computing MAX */ + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1,i__2); + } else if (wntqs) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } else if (wntqa) { +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR" + , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = bdspac + *m * 3; + maxwrk = max(i__1,i__2); + minwrk = *m * 3 + max(*n,bdspac); + } + } + } + work[1] = (real) maxwrk; + } + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + if (*lwork >= 1) { + work[1] = 1.f; + } + return 0; + } + +/* Get machine constants */ + + eps = slamch_("P"); + smlnum = sqrt(slamch_("S")) / eps; + bignum = 1.f / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = slange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0.f && anrm < smlnum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need 2*N, prefer N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need 4*N, prefer 3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *n; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ = 'O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is LDWRKR by N */ + + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in VT, copying result to WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* WORK(IU) is N by N */ + + iu = nwork; + nwork = iu + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R + and VT by right singular vectors of R + (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], n, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1], + lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagoal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (Workspace: need N*N) +*/ + + slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + sgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b29, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + i__2 = *lwork - nwork + 1; + sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out other entries */ + + i__2 = *n - 1; + i__1 = *n - 1; + slaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2], + lda); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite WORK(IU) by left singular vectors of R and VT + by right singular vectors of R + (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (Workspace: need N*N) +*/ + + sgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b29, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else { + +/* + M .LT. MNTHR + + Path 5 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition +*/ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need N+BDSPAC) +*/ + + sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + nwork = iu + ldwrku * *n; + slaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku); + } else { + +/* WORK( IU ) is N by N */ + + ldwrku = *n; + nwork = iu + ldwrku * *n; + +/* WORK(IR) is LDWRKR by N */ + + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in WORK(IU) and computing right + singular vectors of bidiagonal matrix in VT + (Workspace: need N+N*N+BDSPAC) +*/ + + sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, & + vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[ + 1], info); + +/* + Overwrite VT by right singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + + if (*lwork >= *m * *n + *n * 3 + bdspac) { + +/* + Overwrite WORK(IU) by left singular vectors of A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* Copy left singular vectors of A from WORK(IU) to A */ + + slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by left singular vectors of + bidiagonal matrix in WORK(IU), storing result in + WORK(IR) and copying to A + (Workspace: need 2*N*N, prefer N*N+M*N) +*/ + + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + sgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + + a_dim1], lda, &work[iu], &ldwrku, &c_b29, & + work[ir], &ldwrkr); + slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L20: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + slaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*N, prefer 2*N+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need N+BDSPAC) +*/ + + slaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu); + sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + i__1 = *m - *n; + i__2 = *m - *n; + slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n + + 1) * u_dim1], ldu); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition (if + sufficient workspace available) +*/ + + if (*n >= mnthr) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out above L */ + + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need 4*M, prefer 3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nwork = ie + *m; + +/* + Perform bidiagonal SVD, computing singular values only + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* IVT is M by M */ + + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + +/* WORK(IL) is M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + slaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U, and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], m, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], m, &work[nwork], &i__1, &ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by Q + in A, storing result in WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b29, &work[il], & + ldwrkl); + slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L30: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IU), copying result to U + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of L and VT + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by + Q in A, storing result in VT + (Workspace: need M*M) +*/ + + slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + sgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b29, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 4t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__2, &ierr); + +/* Produce L in A, zeroing out other entries */ + + i__2 = *m - 1; + i__1 = *m - 1; + slaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) + + 1], lda); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M+M*M+BDSPAC) +*/ + + sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of L and WORK(IVT) + by right singular vectors of L + (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (Workspace: need M*M) +*/ + + sgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b29, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else { + +/* + N .LT. MNTHR + + Path 5t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition +*/ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +*/ + + i__2 = *lwork - nwork + 1; + sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Perform bidiagonal SVD, only computing singular values + (Workspace: need M+BDSPAC) +*/ + + sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, + dum, idum, &work[nwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* WORK( IVT ) is M by N */ + + slaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by M */ + + nwork = ivt + ldwkvt * *m; + il = nwork; + +/* WORK(IL) is M by CHUNK */ + + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in WORK(IVT) + (Workspace: need M*M+BDSPAC) +*/ + + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, & + work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1] + , info); + +/* + Overwrite U by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3 + bdspac) { + +/* + Overwrite WORK(IVT) by left singular vectors of A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + +/* Copy right singular vectors of A from WORK(IVT) to A */ + + slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**T in A + (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +*/ + + i__2 = *lwork - nwork + 1; + sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by right singular vectors of + bidiagonal matrix in WORK(IVT), storing result in + WORK(IL) and copying to A + (Workspace: need 2*M*M, prefer M*M+M*N) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + sgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], & + ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, & + work[il], m); + slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + slaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 3*M, prefer 2*M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else if (wntqa) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in U and computing right singular + vectors of bidiagonal matrix in VT + (Workspace: need M+BDSPAC) +*/ + + slaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt); + sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[ + vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1], + info); + +/* Set the right corner of VT to identity matrix */ + + i__1 = *n - *m; + i__2 = *n - *m; + slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m + + 1) * vt_dim1], ldvt); + +/* + Overwrite U by left singular vectors of A and VT + by right singular vectors of A + (Workspace: need 2*M+N, prefer 2*M+N*NB) +*/ + + i__1 = *lwork - nwork + 1; + sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + i__1 = *lwork - nwork + 1; + sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (real) maxwrk; + + return 0; + +/* End of SGESDD */ + +} /* sgesdd_ */ + +/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda, + integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_( + integer *, integer *, real *, integer *, integer *, integer *), + sgetrs_(char *, integer *, integer *, real *, integer *, integer * + , real *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SGESV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of SGESV */ + +} /* sgesv_ */ + +/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer j, jp; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *), sscal_(integer * + , real *, real *, integer *), sswap_(integer *, real *, integer *, + real *, integer *), xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 + + + Purpose + ======= + + SGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.f) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + i__2 = *m - j; + r__1 = 1.f / a[j + j * a_dim1]; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + sger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], + lda); + } +/* L10: */ + } + return 0; + +/* End of SGETF2 */ + +} /* sgetf2_ */ + +/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, + integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, jb, nb, iinfo; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), sgetf2_(integer *, + integer *, real *, integer *, integer *, integer *), xerbla_(char + *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb) + * a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of SGETRF */ + +} /* sgetrf_ */ + +/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, + integer *lda, integer *ipiv, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + static logical notran; + extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer + *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SGETRS solves a system of linear equations + A * X = B or A' * X = B + with a general N-by-N matrix A using the LU factorization computed + by SGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A'* X = B (Transpose) + = 'C': A'* X = B (Conjugate transpose = Transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by SGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from SGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A' * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of SGETRS */ + +} /* sgetrs_ */ + +/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, + integer *ldz, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4, + i__5; + real r__1, r__2; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__, j, k, l; + static real s[225] /* was [15][15] */, v[16]; + static integer i1, i2, ii, nh, nr, ns, nv; + static real vv[16]; + static integer itn; + static real tau; + static integer its; + static real ulp, tst1; + static integer maxb; + static real absw; + static integer ierr; + static real unfl, temp, ovfl; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer itemp; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + static logical initz, wantt; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static logical wantz; + extern doublereal slapy2_(real *, real *); + extern /* Subroutine */ int slabad_(real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + extern integer isamax_(integer *, real *, integer *); + extern doublereal slanhs_(char *, integer *, real *, integer *, real *); + extern /* Subroutine */ int slahqr_(logical *, logical *, integer *, + integer *, integer *, real *, integer *, real *, real *, integer * + , integer *, real *, integer *, integer *), slacpy_(char *, + integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *), slarfx_(char *, integer *, integer *, + real *, real *, real *, integer *, real *); + static real smlnum; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H + and, optionally, the matrices T and Z from the Schur decomposition + H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur + form), and Z is the orthogonal matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input orthogonal matrix Q, + so that this routine can give the Schur factorization of a matrix A + which has been reduced to the Hessenberg form H by the orthogonal + matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an orthogonal matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to SGEBAL, and then passed to SGEHRD + when the matrix output by SGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if JOB = 'S', H contains the upper quasi-triangular + matrix T from the Schur decomposition (the Schur form); + 2-by-2 diagonal blocks (corresponding to complex conjugate + pairs of eigenvalues) are returned in standard form, with + H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', + the contents of H are unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues. If two eigenvalues are computed as a complex + conjugate pair, they are stored in consecutive elements of + WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and + WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the + same order as on the diagonal of the Schur form returned in + H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 + diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and + WI(i+1) = -WI(i). + + Z (input/output) REAL array, dimension (LDZ,N) + If COMPZ = 'N': Z is not referenced. + If COMPZ = 'I': on entry, Z need not be set, and on exit, Z + contains the orthogonal matrix Z of the Schur vectors of H. + If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, + which is assumed to be equal to the unit matrix except for + the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. + Normally Q is the orthogonal matrix generated by SORGHR after + the call to SGEHRD which formed the Hessenberg matrix H. + + LDZ (input) INTEGER + The leading dimension of the array Z. + LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, SHSEQR failed to compute all of the + eigenvalues in a total of 30*(IHI-ILO+1) iterations; + elements 1:ilo-1 and i+1:n of WR and WI contain those + eigenvalues which have been successfully computed. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + + *info = 0; + work[1] = (real) max(1,*n); + lquery = *lwork == -1; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -11; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SHSEQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Z, if necessary */ + + if (initz) { + slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Store the eigenvalues isolated by SGEBAL. */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; +/* L20: */ + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.f; + return 0; + } + +/* + Set rows and columns ILO to IHI to zero below the first + subdiagonal. +*/ + + i__1 = *ihi - 2; + for (j = *ilo; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j + 2; i__ <= i__2; ++i__) { + h__[i__ + j * h_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + nh = *ihi - *ilo + 1; + +/* + Determine the order of the multi-shift QR algorithm to be used. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = job; + i__3[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (ns <= 2 || ns > nh || maxb >= nh) { + +/* Use the standard double-shift algorithm */ + + slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ + 1], ilo, ihi, &z__[z_offset], ldz, info); + return 0; + } + maxb = max(3,maxb); +/* Computing MIN */ + i__1 = min(ns,maxb); + ns = min(i__1,15); + +/* + Now 2 < NS <= MAXB < NH. + + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of multiple-shift QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of at most MAXB. Each iteration of the loop + works with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L50: + l = *ilo; + if (i__ < *ilo) { + goto L170; + } + +/* + Perform multiple-shift QR iterations on rows and columns ILO to I + until a submatrix of order at most MAXB splits off at the bottom + because a subdiagonal element has become negligible. +*/ + + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 + = h__[k + k * h_dim1], dabs(r__2)); + if (tst1 == 0.f) { + i__4 = i__ - l + 1; + tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1] + ); + } +/* Computing MAX */ + r__2 = ulp * tst1; + if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2, + smlnum)) { + goto L70; + } +/* L60: */ + } +L70: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible. */ + + h__[l + (l - 1) * h_dim1] = 0.f; + } + +/* Exit from loop if a submatrix of order <= MAXB has split off. */ + + if (l >= i__ - maxb + 1) { + goto L160; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! wantt) { + i1 = l; + i2 = i__; + } + + if (its == 20 || its == 30) { + +/* Exceptional shifts. */ + + i__2 = i__; + for (ii = i__ - ns + 1; ii <= i__2; ++ii) { + wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + ( + r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f; + wi[ii] = 0.f; +/* L80: */ + } + } else { + +/* Use eigenvalues of trailing submatrix of order NS as shifts. */ + + slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * + h_dim1], ldh, s, &c__15); + slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ - + ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset], + ldz, &ierr); + if (ierr > 0) { + +/* + If SLAHQR failed to compute all NS eigenvalues, use the + unconverged diagonal elements as the remaining shifts. +*/ + + i__2 = ierr; + for (ii = 1; ii <= i__2; ++ii) { + wr[i__ - ns + ii] = s[ii + ii * 15 - 16]; + wi[i__ - ns + ii] = 0.f; +/* L90: */ + } + } + } + +/* + Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) + where G is the Hessenberg submatrix H(L:I,L:I) and w is + the vector of shifts (stored in WR and WI). The result is + stored in the local array V. +*/ + + v[0] = 1.f; + i__2 = ns + 1; + for (ii = 2; ii <= i__2; ++ii) { + v[ii - 1] = 0.f; +/* L100: */ + } + nv = 1; + i__2 = i__; + for (j = i__ - ns + 1; j <= i__2; ++j) { + if (wi[j] >= 0.f) { + if (wi[j] == 0.f) { + +/* real shift */ + + i__4 = nv + 1; + scopy_(&i__4, v, &c__1, vv, &c__1); + i__4 = nv + 1; + r__1 = -wr[j]; + sgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l * + h_dim1], ldh, vv, &c__1, &r__1, v, &c__1); + ++nv; + } else if (wi[j] > 0.f) { + +/* complex conjugate pair of shifts */ + + i__4 = nv + 1; + scopy_(&i__4, v, &c__1, vv, &c__1); + i__4 = nv + 1; + r__1 = wr[j] * -2.f; + sgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l * + h_dim1], ldh, v, &c__1, &r__1, vv, &c__1); + i__4 = nv + 1; + itemp = isamax_(&i__4, vv, &c__1); +/* Computing MAX */ + r__2 = (r__1 = vv[itemp - 1], dabs(r__1)); + temp = 1.f / dmax(r__2,smlnum); + i__4 = nv + 1; + sscal_(&i__4, &temp, vv, &c__1); + absw = slapy2_(&wr[j], &wi[j]); + temp = temp * absw * absw; + i__4 = nv + 2; + i__5 = nv + 1; + sgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l * + h_dim1], ldh, vv, &c__1, &temp, v, &c__1); + nv += 2; + } + +/* + Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, + reset it to the unit vector. +*/ + + itemp = isamax_(&nv, v, &c__1); + temp = (r__1 = v[itemp - 1], dabs(r__1)); + if (temp == 0.f) { + v[0] = 1.f; + i__4 = nv; + for (ii = 2; ii <= i__4; ++ii) { + v[ii - 1] = 0.f; +/* L110: */ + } + } else { + temp = dmax(temp,smlnum); + r__1 = 1.f / temp; + sscal_(&nv, &r__1, v, &c__1); + } + } +/* L120: */ + } + +/* Multiple-shift QR step */ + + i__2 = i__ - 1; + for (k = l; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__4 = ns + 1, i__5 = i__ - k + 1; + nr = min(i__4,i__5); + if (k > l) { + scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + slarfg_(&nr, v, &v[1], &c__1, &tau); + if (k > l) { + h__[k + (k - 1) * h_dim1] = v[0]; + i__4 = i__; + for (ii = k + 1; ii <= i__4; ++ii) { + h__[ii + (k - 1) * h_dim1] = 0.f; +/* L130: */ + } + } + v[0] = 1.f; + +/* + Apply G from the left to transform the rows of the matrix in + columns K to I2. +*/ + + i__4 = i2 - k + 1; + slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, & + work[1]); + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+NR,I). + + Computing MIN +*/ + i__5 = k + nr; + i__4 = min(i__5,i__) - i1 + 1; + slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, + &work[1]); + + if (wantz) { + +/* Accumulate transformations in the matrix Z */ + + slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], + ldz, &work[1]); + } +/* L140: */ + } + +/* L150: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L160: + +/* + A submatrix of order <= MAXB in rows and columns L to I has split + off. Use the double-shift QR algorithm to handle it. +*/ + + slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1], + ilo, ihi, &z__[z_offset], ldz, info); + if (*info > 0) { + return 0; + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with a new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L50; + +L170: + work[1] = (real) max(1,*n); + return 0; + +/* End of SHSEQR */ + +} /* shseqr_ */ + +/* Subroutine */ int slabad_(real *small, real *large) +{ + /* Builtin functions */ + double r_lg10(real *), sqrt(doublereal); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLABAD takes as input the values computed by SLAMCH for underflow and + overflow, and returns the square root of each of these values if the + log of LARGE is sufficiently large. This subroutine is intended to + identify machines with a large exponent range, such as the Crays, and + redefine the underflow and overflow limits to be the square roots of + the values computed by SLAMCH. This subroutine is needed because + SLAMCH does not compensate for poor arithmetic in the upper half of + the exponent range, as is found on a Cray. + + Arguments + ========= + + SMALL (input/output) REAL + On entry, the underflow threshold as computed by SLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of SMALL, otherwise unchanged. + + LARGE (input/output) REAL + On entry, the overflow threshold as computed by SLAMCH. + On exit, if LOG10(LARGE) is sufficiently large, the square + root of LARGE, otherwise unchanged. + + ===================================================================== + + + If it looks like we're on a Cray, take the square root of + SMALL and LARGE to avoid overflow and underflow problems. +*/ + + if (r_lg10(large) > 2e3f) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of SLABAD */ + +} /* slabad_ */ + +/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a, + integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, + integer *ldx, real *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), slarfg_( + integer *, real *, real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLABRD reduces the first NB rows and columns of a real general + m by n matrix A to upper or lower bidiagonal form by an orthogonal + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by SGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the orthogonal matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the orthogonal + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the orthogonal matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) REAL array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) REAL array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) REAL array dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix Q. See Further Details. + + TAUP (output) REAL array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the orthogonal matrix P. See Further Details. + + X (output) REAL array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= M. + + Y (output) REAL array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (output) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are real scalars, and v and u are real vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1], + ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[ + i__ + (i__ + 1) * a_dim1], lda); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + sgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b29, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1], + ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1] + , lda); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.f; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ * + x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *m - i__; + sgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + sgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[ + i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + sgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15, + &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of SLABRD */ + +} /* slabrd_ */ + +/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a, + integer *lda, real *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper triangle + or trapezoid is accessed; if UPLO = 'L', only the lower + triangle or trapezoid is accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) REAL array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of SLACPY */ + +} /* slacpy_ */ + +/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p, + real *q) +{ + static real e, f; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLADIV performs complex division in real arithmetic + + a + i*b + p + i*q = --------- + c + i*d + + The algorithm is due to Robert L. Smith and can be found + in D. Knuth, The art of Computer Programming, Vol.2, p.195 + + Arguments + ========= + + A (input) REAL + B (input) REAL + C (input) REAL + D (input) REAL + The scalars a, b, c, and d in the above expression. + + P (output) REAL + Q (output) REAL + The scalars p and q in the above expression. + + ===================================================================== +*/ + + + if (dabs(*d__) < dabs(*c__)) { + e = *d__ / *c__; + f = *c__ + *d__ * e; + *p = (*a + *b * e) / f; + *q = (*b - *a * e) / f; + } else { + e = *c__ / *d__; + f = *d__ + *c__ * e; + *p = (*b + *a * e) / f; + *q = (-(*a) + *b * e) / f; + } + + return 0; + +/* End of SLADIV */ + +} /* sladiv_ */ + +/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2) +{ + /* System generated locals */ + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, and RT2 + is the eigenvalue of smaller absolute value. + + Arguments + ========= + + A (input) REAL + The (1,1) element of the 2-by-2 matrix. + + B (input) REAL + The (1,2) and (2,1) elements of the 2-by-2 matrix. + + C (input) REAL + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) REAL + The eigenvalue of larger absolute value. + + RT2 (output) REAL + The eigenvalue of smaller absolute value. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = dabs(df); + tb = *b + *b; + ab = dabs(tb); + if (dabs(*a) > dabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + r__1 = ab / adf; + rt = adf * sqrt(r__1 * r__1 + 1.f); + } else if (adf < ab) { +/* Computing 2nd power */ + r__1 = adf / ab; + rt = ab * sqrt(r__1 * r__1 + 1.f); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.f); + } + if (sm < 0.f) { + *rt1 = (sm - rt) * .5f; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.f) { + *rt1 = (sm + rt) * .5f; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5f; + *rt2 = rt * -.5f; + } + return 0; + +/* End of SLAE2 */ + +} /* slae2_ */ + +/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real + *d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs, + real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + real r__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + static real temp; + static integer curr; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer iperm, indxq, iwrem; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static integer iqptr, tlvls; + extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *, + integer *, real *, integer *, real *, integer *, integer *), + slaed7_(integer *, integer *, integer *, integer *, integer *, + integer *, real *, real *, integer *, integer *, real *, integer * + , real *, integer *, integer *, integer *, integer *, integer *, + real *, real *, integer *, integer *); + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer igivnm, submat; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAED0 computes all eigenvalues and corresponding eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + = 2: Compute eigenvalues and eigenvectors of tridiagonal + matrix. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the main diagonal of the tridiagonal matrix. + On exit, its eigenvalues. + + E (input) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, Q must contain an N-by-N orthogonal matrix. + If ICOMPQ = 0 Q is not referenced. + If ICOMPQ = 1 On entry, Q is a subset of the columns of the + orthogonal matrix used to reduce the full + matrix to tridiagonal form corresponding to + the subset of the full matrix which is being + decomposed at this time. + If ICOMPQ = 2 On entry, Q will be the identity matrix. + On exit, Q contains the eigenvectors of the + tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. If eigenvectors are + desired, then LDQ >= max(1,N). In any case, LDQ >= 1. + + QSTORE (workspace) REAL array, dimension (LDQS, N) + Referenced only when ICOMPQ = 1. Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. If ICOMPQ = 1, + then LDQS >= max(1,N). In any case, LDQS >= 1. + + WORK (workspace) REAL array, + If ICOMPQ = 0 or 1, the dimension of WORK must be at least + 1 + 3*N + 2*N*lg N + 2*N**2 + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of WORK must be at least + 4*N + N**2. + + IWORK (workspace) INTEGER array, + If ICOMPQ = 0 or 1, the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N. + ( lg( N ) = smallest integer k + such that 2^k >= N ) + If ICOMPQ = 2, the dimension of IWORK must be at least + 3 + 5*N. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (r__1 = e[smm1], dabs(r__1)); + d__[submat] -= (r__1 = e[smm1], dabs(r__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info); + if (*info != 0) { + goto L130; + } + } else { + ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1], + ldqs); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. + SLAED1 is used only for the full eigensystem of a tridiagonal + matrix. + SLAED7 handles the cases in which eigenvalues only or eigenvalues + and eigenvectors of a full symmetric matrix (which was reduced to + tridiagonal form) are desired. +*/ + + if (*icompq == 2) { + slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] + , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + scopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of SLAED0 */ + +} /* slaed0_ */ + +/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq, + integer *indxq, real *rho, integer *cutpnt, real *work, integer * + iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slaed2_(integer *, integer *, integer *, real *, real + *, integer *, integer *, real *, real *, real *, real *, real *, + integer *, integer *, integer *, integer *, integer *), slaed3_( + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, real *, integer *, integer *, real *, real *, integer *) + ; + static integer idlmda; + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAED1 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles + the case in which eigenvalues only or eigenvalues and eigenvectors + of a full symmetric matrix (which was reduced to tridiagonal form) + are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) REAL array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + On entry, the permutation which separately sorts the two + subproblems in D into ascending order. + On exit, the permutation which will reintegrate the + subproblems back into sorted order, + i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. + + RHO (input) REAL + The subdiagonal entry used to create the rank-1 modification. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= CUTPNT <= N/2. + + WORK (workspace) REAL array, dimension (4*N + N**2) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED1", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are integer pointers which indicate + the portion of the workspace + used by a particular array in SLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + cpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of SLAED1 */ + +} /* slaed1_ */ + +/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * + dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * + indxp, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + real r__1, r__2, r__3, r__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + static real eps, tau, tol; + static integer psm[4], imax, jmax, ctot[4]; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), sscal_(integer *, real *, real *, + integer *), scopy_(integer *, real *, integer *, real *, integer * + ); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *), slacpy_(char *, integer *, integer *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLAED2 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. 0 <= K <=N. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + N1 (input) INTEGER + The location of the last eigenvalue in the leading sub-matrix. + min(1,N) <= N1 <= N/2. + + D (input/output) REAL array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. + On exit, D contains the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, Q contains the eigenvectors of two submatrices in + the two square blocks with corners at (1,1), (N1,N1) + and (N1+1, N1+1), (N,N). + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input/output) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have N1 added to their + values. Destroyed on exit. + + RHO (input/output) REAL + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + SLAED3. + + Z (input) REAL array, dimension (N) + On entry, Z contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + On exit, the contents of Z have been destroyed by the updating + process. + + DLAMDA (output) REAL array, dimension (N) + A copy of the first K eigenvalues which will be used by + SLAED3 to form the secular equation. + + W (output) REAL array, dimension (N) + The first k values of the final deflation-altered z-vector + which will be passed to SLAED3. + + Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) + A copy of the first K eigenvectors which will be used by + SLAED3 in a matrix multiply (SGEMM) to solve for the new + eigenvectors. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of DLAMDA into + ascending order. + + INDXC (output) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups: the first group contains non-zero + elements only at and above N1, the second contains + non-zero elements only below N1, and the third is dense. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + COLTYP (workspace/output) INTEGER array, dimension (N) + During execution, a label which will indicate which of the + following types a column in the Q2 matrix is: + 1 : non-zero in the upper half only; + 2 : dense; + 3 : non-zero in the lower half only; + 4 : deflated. + On exit, COLTYP(i) is the number of columns of type i, + for i=1 to 4 only. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* + Normalize z so that norm(z) = 1. Since z is the concatenation of + two normalized vectors, norm2(z) = sqrt(2). +*/ + + t = 1.f / sqrt(2.f); + sscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs( + r__2)); + tol = eps * 8.f * dmax(r__3,r__4); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); + scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.f; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + r__1 = c__; +/* Computing 2nd power */ + r__2 = s; + t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); +/* Computing 2nd power */ + r__1 = s; +/* Computing 2nd power */ + r__2 = c__; + d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four uniform groups (although one or more of these groups may be + empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* + Fill out the INDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); + i__1 = *n - *k; + scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Copy CTOT into COLTYP for referencing in SLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of SLAED2 */ + +} /* slaed2_ */ + +/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, + real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * + indx, integer *ctot, real *w, real *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static integer i__, j, n2, n12, ii, n23, iq2; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), scopy_(integer *, real *, + integer *, real *, integer *), slaed4_(integer *, integer *, real + *, real *, real *, real *, real *, integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_( + char *, integer *, integer *, real *, integer *, real *, integer * + ), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAED3 finds the roots of the secular equation, as defined by the + values in D, W, and RHO, between 1 and K. It makes the + appropriate calls to SLAED4 and then updates the eigenvectors by + multiplying the matrix of eigenvectors of the pair of eigensystems + being combined by the matrix of eigenvectors of the K-by-K system + which is solved here. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + SLAED4. K >= 0. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (deflation may result in N>K). + + N1 (input) INTEGER + The location of the last eigenvalue in the leading submatrix. + min(1,N) <= N1 <= N/2. + + D (output) REAL array, dimension (N) + D(I) contains the updated eigenvalues for + 1 <= I <= K. + + Q (output) REAL array, dimension (LDQ,N) + Initially the first K columns are used as workspace. + On output the columns 1 to K contain + the updated eigenvectors. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) REAL + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input/output) REAL array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. May be changed on output by + having lowest order bit set to zero on Cray X-MP, Cray Y-MP, + Cray-2, or Cray C-90, as described above. + + Q2 (input) REAL array, dimension (LDQ2, N) + The first K columns of this matrix contain the non-deflated + eigenvectors for the split problem. + + INDX (input) INTEGER array, dimension (N) + The permutation used to arrange the columns of the deflated + Q matrix into three groups (see SLAED2). + The rows of the eigenvectors found by SLAED4 must be likewise + permuted before the matrix multiply can take place. + + CTOT (input) INTEGER array, dimension (4) + A count of the total number of the various types of columns + in Q, as described in INDX. The fourth column type is any + column which has been deflated. + + W (input/output) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. Destroyed on + output. + + S (workspace) REAL array, dimension (N1 + 1)*K + Will contain the eigenvectors of the repaired matrix which + will be multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max(1,K). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + scopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = sqrt(-w[i__]); + w[i__] = r_sign(&r__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = snrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + sgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, & + c_b29, &q[*n1 + 1 + q_dim1], ldq); + } else { + slaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq); + } + + slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); + if (n12 != 0) { + sgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29, + &q[q_offset], ldq); + } else { + slaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq); + } + + +L120: + return 0; + +/* End of SLAED3 */ + +} /* slaed3_ */ + +/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *dlam, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real a, b, c__; + static integer j; + static real w; + static integer ii; + static real dw, zz[3]; + static integer ip1; + static real del, eta, phi, eps, tau, psi; + static integer iim1, iip1; + static real dphi, dpsi; + static integer iter; + static real temp, prew, temp1, dltlb, dltub, midpt; + static integer niter; + static logical swtch; + extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *, + real *, real *), slaed6_(integer *, logical *, real *, real *, + real *, real *, real *, integer *); + static logical swtch3; + extern doublereal slamch_(char *); + static logical orgati; + static real erretm, rhoinv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + December 23, 1999 + + + Purpose + ======= + + This subroutine computes the I-th updated eigenvalue of a symmetric + rank-one modification to a diagonal matrix whose elements are + given in the array d, and that + + D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) REAL array, dimension (N) + The original eigenvalues. It is assumed that they are in + order, D(I) < D(J) for I < J. + + Z (input) REAL array, dimension (N) + The components of the updating vector. + + DELTA (output) REAL array, dimension (N) + If N .ne. 1, DELTA contains (D(j) - lambda_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DLAM (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.f; + return 0; + } + if (*n == 2) { + slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = slamch_("Epsilon"); + rhoinv = 1.f / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.f; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.f; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.f) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + } + +/* + It can be proved that + D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +*/ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + +/* + It can be proved that + D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +*/ + + dltlb = 0.f; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.f) { + c__ = dabs(c__); + } + if (c__ == 0.f) { +/* + ETA = B/A + ETA = RHO - TAU +*/ + eta = dltub - tau; + } else if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * + (dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.f; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.f; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.f) { + +/* + d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.f) { + tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } else { + tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } + dltlb = 0.f; + dltub = midpt; + } else { + +/* + (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.f) { + tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( + r__1)))); + } else { + tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) + / (c__ * 2.f); + } + dltlb = -midpt; + dltub = 0.f; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.f) { + swtch3 = TRUE_; + } + } else { + if (w > 0.f) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 * + r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 * + r__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + + prew = w; + +/* L170: */ + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + (r__1 = tau + eta, dabs(r__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } else { + if (w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.f) { + dltlb = dmax(dltlb,tau); + } else { + dltub = dmin(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (r__1 * r__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) + )) / (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, + dabs(r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.f) { + eta = (dltub - tau) / 2.f; + } else { + eta = (dltlb - tau) / 2.f; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * + 3.f + dabs(tau) * dw; + if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of SLAED4 */ + +} /* slaed4_ */ + +/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dlam) +{ + /* System generated locals */ + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real b, c__, w, del, tau, temp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + This subroutine computes the I-th eigenvalue of a symmetric rank-one + modification of a 2-by-2 diagonal matrix + + diag( D ) + RHO * Z * transpose(Z) . + + The diagonal elements in the array D are assumed to satisfy + + D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) REAL array, dimension (2) + The original eigenvalues. We assume D(1) < D(2). + + Z (input) REAL array, dimension (2) + The components of the updating vector. + + DELTA (output) REAL array, dimension (2) + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DLAM (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f; + if (w > 0.f) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) + )); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.f) { + tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); + } else { + tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.f) { + tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; + } else { + tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End OF SLAED5 */ + +} /* slaed5_ */ + +/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho, + real *d__, real *z__, real *finit, real *tau, integer *info) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *); + + /* Local variables */ + static real a, b, c__, f; + static integer i__; + static real fc, df, ddf, eta, eps, base; + static integer iter; + static real temp, temp1, temp2, temp3, temp4; + static logical scale; + static integer niter; + static real small1, small2, sminv1, sminv2, dscale[3], sclfac; + extern doublereal slamch_(char *); + static real zscale[3], erretm, sclinv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAED6 computes the positive or negative root (closest to the origin) + of + z(1) z(2) z(3) + f(x) = rho + --------- + ---------- + --------- + d(1)-x d(2)-x d(3)-x + + It is assumed that + + if ORGATI = .true. the root is between d(2) and d(3); + otherwise it is between d(1) and d(2) + + This routine will be called by SLAED4 when necessary. In most cases, + the root sought is the smallest in magnitude, though it might not be + in some extremely rare situations. + + Arguments + ========= + + KNITER (input) INTEGER + Refer to SLAED4 for its significance. + + ORGATI (input) LOGICAL + If ORGATI is true, the needed root is between d(2) and + d(3); otherwise it is between d(1) and d(2). See + SLAED4 for further details. + + RHO (input) REAL + Refer to the equation f(x) above. + + D (input) REAL array, dimension (3) + D satisfies d(1) < d(2) < d(3). + + Z (input) REAL array, dimension (3) + Each of the elements in z must be positive. + + FINIT (input) REAL + The value of f at 0. It is more accurate than the one + evaluated inside this routine (if someone wants to do + so). + + TAU (output) REAL + The root of the equation f(x). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, failure to converge + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + + *info = 0; + + niter = 1; + *tau = 0.f; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.f; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.f; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( + c__); + temp = dmax(r__1,r__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.f) { + *tau = b / a; + } else if (a <= 0.f) { + *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) + + z__[3] / (d__[3] - *tau); + if (dabs(*finit) <= dabs(temp)) { + *tau = 0.f; + } + } + +/* + On first call to routine, get machine parameters for + possible scaling to avoid overflow +*/ + + if (first) { + eps = slamch_("Epsilon"); + base = slamch_("Base"); + i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f) + ; + small1 = pow_ri(&base, &i__1); + sminv1 = 1.f / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + first = FALSE_; + } + +/* + Determine if scaling of inputs necessary to avoid overflow + when computing 1/TEMP**3 +*/ + + if (*orgati) { +/* Computing MIN */ + r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - * + tau, dabs(r__2)); + temp = dmin(r__3,r__4); + } else { +/* Computing MIN */ + r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - * + tau, dabs(r__2)); + temp = dmin(r__3,r__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.f; + df = 0.f; + ddf = 0.f; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1.f / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (dabs(f) <= 0.f) { + goto L60; + } + +/* + Iteration begins + + It is not hard to see that + + 1) Iterations will go up monotonically + if FINIT < 0; + + 2) Iterations will go down monotonically + if FINIT > 0. +*/ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs( + c__); + temp = dmax(r__1,r__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.f) { + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + if (f * eta >= 0.f) { + eta = -f / df; + } + + temp = eta + *tau; + if (*orgati) { + if (eta > 0.f && temp >= dscale[2]) { + eta = (dscale[2] - *tau) / 2.f; + } + if (eta < 0.f && temp <= dscale[1]) { + eta = (dscale[1] - *tau) / 2.f; + } + } else { + if (eta > 0.f && temp >= dscale[1]) { + eta = (dscale[1] - *tau) / 2.f; + } + if (eta < 0.f && temp <= dscale[0]) { + eta = (dscale[0] - *tau) / 2.f; + } + } + *tau += eta; + + fc = 0.f; + erretm = 0.f; + df = 0.f; + ddf = 0.f; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1.f / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += dabs(temp4); + df += temp2; + ddf += temp3; +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df; + if (dabs(f) <= eps * erretm) { + goto L60; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of SLAED6 */ + +} /* slaed6_ */ + +/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, + integer *ldq, integer *indxq, real *rho, integer *cutpnt, real * + qstore, integer *qptr, integer *prmptr, integer *perm, integer * + givptr, integer *givcol, real *givnum, real *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr, + indxc; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer indxp; + extern /* Subroutine */ int slaed8_(integer *, integer *, integer *, + integer *, real *, real *, integer *, integer *, real *, integer * + , real *, real *, real *, integer *, real *, integer *, integer *, + integer *, real *, integer *, integer *, integer *), slaed9_( + integer *, integer *, integer *, integer *, real *, real *, + integer *, real *, real *, real *, real *, integer *, integer *), + slaeda_(integer *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, real *, real *, integer *, real * + , real *, integer *); + static integer idlmda; + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense symmetric matrix + that has been reduced to tridiagonal form. SLAED1 handles + the case in which all eigenvalues and eigenvectors of a symmetric + tridiagonal matrix are desired. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLAED8. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine SLAED4 (as called by SLAED9). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= CURLVL <= TLVLS. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) REAL array, dimension (LDQ, N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (output) INTEGER array, dimension (N) + The permutation which will reintegrate the subproblem just + solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) + will be in ascending order. + + RHO (input) REAL + The subdiagonal element used to create the rank-1 + modification. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSTORE (input/output) REAL array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + WORK (workspace) REAL array, dimension (3*N+QSIZ*N) + + IWORK (workspace) INTEGER array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLAED8 and SLAED9. +*/ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + sgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b29, &q[q_offset], ldq); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of SLAED7 */ + +} /* slaed7_ */ + +/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho, + integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2, + real *w, integer *perm, integer *givptr, integer *givcol, real * + givnum, integer *indxp, integer *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real c__; + static integer i__, j; + static real s, t; + static integer k2, n1, n2, jp, n1p1; + static real eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), sscal_(integer *, real *, real *, + integer *), scopy_(integer *, real *, integer *, real *, integer * + ); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer isamax_(integer *, real *, integer *); + extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer + *, integer *, integer *), slacpy_(char *, integer *, integer *, + real *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + ICOMPQ (input) INTEGER + = 0: Compute eigenvalues only. + = 1: Compute eigenvectors of original dense symmetric matrix + also. On entry, Q contains the orthogonal matrix used + to reduce the original matrix to tridiagonal form. + + K (output) INTEGER + The number of non-deflated eigenvalues, and the order of the + related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the orthogonal matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + D (input/output) REAL array, dimension (N) + On entry, the eigenvalues of the two submatrices to be + combined. On exit, the trailing (N-K) updated eigenvalues + (those which were deflated) sorted into increasing order. + + Q (input/output) REAL array, dimension (LDQ,N) + If ICOMPQ = 0, Q is not referenced. Otherwise, + on entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + INDXQ (input) INTEGER array, dimension (N) + The permutation which separately sorts the two sub-problems + in D into ascending order. Note that elements in the second + half of this permutation must first have CUTPNT added to + their values in order to be accurate. + + RHO (input/output) REAL + On entry, the off-diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. + On exit, RHO has been modified to the value required by + SLAED3. + + CUTPNT (input) INTEGER + The location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + Z (input) REAL array, dimension (N) + On entry, Z contains the updating vector (the last row of + the first sub-eigenvector matrix and the first row of the + second sub-eigenvector matrix). + On exit, the contents of Z are destroyed by the updating + process. + + DLAMDA (output) REAL array, dimension (N) + A copy of the first K eigenvalues which will be used by + SLAED3 to form the secular equation. + + Q2 (output) REAL array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + a copy of the first K eigenvectors which will be used by + SLAED7 in a matrix multiply (SGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max(1,N). + + W (output) REAL array, dimension (N) + The first k values of the final deflation-altered z-vector and + will be passed to SLAED3. + + PERM (output) INTEGER array, dimension (N) + The permutations (from deflation and sorting) to be applied + to each eigenblock. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) REAL array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INDXP (workspace) INTEGER array, dimension (N) + The permutation used to place deflated values of D at the end + of the array. INDXP(1:K) points to the nondeflated D-values + and INDXP(K+1:N) points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + The permutation used to sort the contents of D into ascending + order. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.f) { + sscal_(&n2, &c_b151, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1.f / sqrt(2.f); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + sscal_(n, &t, &z__[1], &c__1); + *rho = (r__1 = *rho * 2.f, dabs(r__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerence */ + + imax = isamax_(n, &z__[1], &c__1); + jmax = isamax_(n, &d__[1], &c__1); + eps = slamch_("Epsilon"); + tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + } + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + *givptr = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.f; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L130: */ + } + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq); + } + } + + return 0; + +/* End of SLAED8 */ + +} /* slaed8_ */ + +/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, + real *w, real *s, integer *lds, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static integer i__, j; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slaed4_(integer *, integer *, real *, real *, real *, + real *, real *, integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAED9 finds the roots of the secular equation, as defined by the + values in D, Z, and RHO, between KSTART and KSTOP. It makes the + appropriate calls to SLAED4 and then stores the new matrix of + eigenvectors for use in calculating the next level of Z vectors. + + Arguments + ========= + + K (input) INTEGER + The number of terms in the rational function to be solved by + SLAED4. K >= 0. + + KSTART (input) INTEGER + KSTOP (input) INTEGER + The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP + are to be computed. 1 <= KSTART <= KSTOP <= K. + + N (input) INTEGER + The number of rows and columns in the Q matrix. + N >= K (delation may result in N > K). + + D (output) REAL array, dimension (N) + D(I) contains the updated eigenvalues + for KSTART <= I <= KSTOP. + + Q (workspace) REAL array, dimension (LDQ,N) + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + RHO (input) REAL + The value of the parameter in the rank one update equation. + RHO >= 0 required. + + DLAMDA (input) REAL array, dimension (K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + W (input) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating vector. + + S (output) REAL array, dimension (LDS, K) + Will contain the eigenvectors of the repaired matrix which + will be stored for subsequent Z vector calculation and + multiplied by the previously accumulated eigenvectors + to update the system. + + LDS (input) INTEGER + The leading dimension of S. LDS >= max( 1, K ). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAED9", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* + Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), + which on any of these machines zeros out the bottommost + bit of DLAMDA(I) if it is 1; this makes the subsequent + subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DLAMDA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DLAMDA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = sqrt(-w[i__]); + w[i__] = r_sign(&r__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of SLAED9 */ + +} /* slaed9_ */ + +/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, real *givnum, real *q, integer *qptr, real *z__, + real *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, k, mid, ptr, curr; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAEDA computes the Z vector corresponding to the merge step in the + CURLVLth step of the merge process with TLVLS steps for the CURPBMth + problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and incidentally the + size of the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) REAL array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + Q (input) REAL array, dimension (N**2) + Contains the square eigenblocks from previous levels, the + starting positions for blocks are given by QPTR. + + QPTR (input) INTEGER array, dimension (N+2) + Contains a list of pointers which indicate where in Q an + eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates + the size of the block. + + Z (output) REAL array, dimension (N) + On output this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). + + ZTEMP (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAEDA", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* + Determine location of lowest level subproblem in the full storage + scheme +*/ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these square + roots. +*/ + + bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); + bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.f; +/* L10: */ + } + scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.f; +/* L20: */ + } + +/* + Loop thru remaining levels 1 -> CURLVL applying the Givens + rotations and permutation and then multiplying the center matrices + against the current Z. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* + Multiply Blocks at CURR and CURR+1 + + Determine size of these matrices. We add HALF to the value of + the SQRT in case the machine underestimates one of these + square roots. +*/ + + bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); + bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + + .5f); + if (bsiz1 > 0) { + sgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1); + } + i__2 = psiz1 - bsiz1; + scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + sgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1); + } + i__2 = psiz2 - bsiz2; + scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of SLAEDA */ + +} /* slaeda_ */ + +/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real * + rt2, real *cs1, real *sn1) +{ + /* System generated locals */ + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + static integer sgn1, sgn2; + static real acmn, acmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix + [ A B ] + [ B C ]. + On return, RT1 is the eigenvalue of larger absolute value, RT2 is the + eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right + eigenvector for RT1, giving the decomposition + + [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] + [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. + + Arguments + ========= + + A (input) REAL + The (1,1) element of the 2-by-2 matrix. + + B (input) REAL + The (1,2) element and the conjugate of the (2,1) element of + the 2-by-2 matrix. + + C (input) REAL + The (2,2) element of the 2-by-2 matrix. + + RT1 (output) REAL + The eigenvalue of larger absolute value. + + RT2 (output) REAL + The eigenvalue of smaller absolute value. + + CS1 (output) REAL + SN1 (output) REAL + The vector (CS1, SN1) is a unit right eigenvector for RT1. + + Further Details + =============== + + RT1 is accurate to a few ulps barring over/underflow. + + RT2 may be inaccurate if there is massive cancellation in the + determinant A*C-B*B; higher precision or correctly rounded or + correctly truncated arithmetic would be needed to compute RT2 + accurately in all cases. + + CS1 and SN1 are accurate to a few ulps barring over/underflow. + + Overflow is possible only if RT1 is within a factor of 5 of overflow. + Underflow is harmless if the input data is 0 or exceeds + underflow_threshold / macheps. + + ===================================================================== + + + Compute the eigenvalues +*/ + + sm = *a + *c__; + df = *a - *c__; + adf = dabs(df); + tb = *b + *b; + ab = dabs(tb); + if (dabs(*a) > dabs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + r__1 = ab / adf; + rt = adf * sqrt(r__1 * r__1 + 1.f); + } else if (adf < ab) { +/* Computing 2nd power */ + r__1 = adf / ab; + rt = ab * sqrt(r__1 * r__1 + 1.f); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.f); + } + if (sm < 0.f) { + *rt1 = (sm - rt) * .5f; + sgn1 = -1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.f) { + *rt1 = (sm + rt) * .5f; + sgn1 = 1; + +/* + Order of execution important. + To get fully accurate smaller eigenvalue, + next line needs to be executed in higher precision. +*/ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5f; + *rt2 = rt * -.5f; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.f) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = dabs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1.f / sqrt(ct * ct + 1.f); + *cs1 = ct * *sn1; + } else { + if (ab == 0.f) { + *cs1 = 1.f; + *sn1 = 0.f; + } else { + tn = -cs / tb; + *cs1 = 1.f / sqrt(tn * tn + 1.f); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of SLAEV2 */ + +} /* slaev2_ */ + +/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * + wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * + info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static integer i__, j, k, l, m; + static real s, v[3]; + static integer i1, i2; + static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33, + h44; + static integer nh; + static real cs; + static integer nr; + static real sn; + static integer nz; + static real ave, h33s, h44s; + static integer itn, its; + static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1]; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *), scopy_(integer *, real *, integer *, + real *, integer *), slanv2_(real *, real *, real *, real *, real * + , real *, real *, real *, real *, real *), slabad_(real *, real *) + ; + extern doublereal slamch_(char *); + extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, + real *); + extern doublereal slanhs_(char *, integer *, real *, integer *, real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAHQR is an auxiliary routine called by SHSEQR to update the + eigenvalues and Schur decomposition already computed by SHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper quasi-triangular in + rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless + ILO = 1). SLAHQR works primarily with the Hessenberg + submatrix in rows and columns ILO to IHI, but applies + transformations to all of H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) REAL array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if WANTT is .TRUE., H is upper quasi-triangular in + rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in + standard form. If WANTT is .FALSE., the contents of H are + unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + WR (output) REAL array, dimension (N) + WI (output) REAL array, dimension (N) + The real and imaginary parts, respectively, of the computed + eigenvalues ILO to IHI are stored in the corresponding + elements of WR and WI. If two eigenvalues are computed as a + complex conjugate pair, they are stored in consecutive + elements of WR and WI, say the i-th and (i+1)th, with + WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with WR(i) = H(i,i), and, if + H(i:i+1,i:i+1) is a 2-by-2 diagonal block, + WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) REAL array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by SHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI + in a total of 30*(IHI-ILO+1) iterations; if INFO = i, + elements i+1:ihi of WR and WI contain those eigenvalues + which have been successfully computed. + + Further Details + =============== + + 2-96 Based on modifications by + David Day, Sandia National Laboratory, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.f; + return 0; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (nh / ulp); + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1 or 2. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L10: + l = *ilo; + if (i__ < *ilo) { + goto L150; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 or 2 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 + = h__[k + k * h_dim1], dabs(r__2)); + if (tst1 == 0.f) { + i__3 = i__ - l + 1; + tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work); + } +/* Computing MAX */ + r__2 = ulp * tst1; + if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2, + smlnum)) { + goto L30; + } +/* L20: */ + } +L30: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + h__[l + (l - 1) * h_dim1] = 0.f; + } + +/* Exit from loop if a submatrix of order 1 or 2 has split off. */ + + if (l >= i__ - 1) { + goto L140; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10 || its == 20) { + +/* Exceptional shift. */ + + s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = + h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2)); + h44 = s * .75f + h__[i__ + i__ * h_dim1]; + h33 = h44; + h43h34 = s * -.4375f * s; + } else { + +/* + Prepare to use Francis' double shift + (i.e. 2nd degree generalized Rayleigh quotient) +*/ + + h44 = h__[i__ + i__ * h_dim1]; + h33 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * + h_dim1]; + s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) * + h_dim1]; + disc = (h33 - h44) * .5f; + disc = disc * disc + h43h34; + if (disc > 0.f) { + +/* Real roots: use Wilkinson's shift twice */ + + disc = sqrt(disc); + ave = (h33 + h44) * .5f; + if (dabs(h33) - dabs(h44) > 0.f) { + h33 = h33 * h44 - h43h34; + h44 = h33 / (r_sign(&disc, &ave) + ave); + } else { + h44 = r_sign(&disc, &ave) + ave; + } + h33 = h44; + h43h34 = 0.f; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__2 = l; + for (m = i__ - 2; m >= i__2; --m) { +/* + Determine the effect of starting the double-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + h11 = h__[m + m * h_dim1]; + h22 = h__[m + 1 + (m + 1) * h_dim1]; + h21 = h__[m + 1 + m * h_dim1]; + h12 = h__[m + (m + 1) * h_dim1]; + h44s = h44 - h11; + h33s = h33 - h11; + v1 = (h33s * h44s - h43h34) / h21 + h12; + v2 = h22 - h11 - h33s - h44s; + v3 = h__[m + 2 + (m + 1) * h_dim1]; + s = dabs(v1) + dabs(v2) + dabs(v3); + v1 /= s; + v2 /= s; + v3 /= s; + v[0] = v1; + v[1] = v2; + v[2] = v3; + if (m == l) { + goto L50; + } + h00 = h__[m - 1 + (m - 1) * h_dim1]; + h10 = h__[m + (m - 1) * h_dim1]; + tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22)); + if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) { + goto L50; + } +/* L40: */ + } +L50: + +/* Double-shift QR step */ + + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__3 = 3, i__4 = i__ - k + 1; + nr = min(i__3,i__4); + if (k > m) { + scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + slarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.f; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.f; + } + } else if (m > l) { + h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1]; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; +/* L60: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). + + Computing MIN +*/ + i__4 = k + 3; + i__3 = min(i__4,i__); + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; +/* L70: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; +/* L80: */ + } + } + } else if (nr == 2) { + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; +/* L90: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+3,I). +*/ + + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + ; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; +/* L100: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * + z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; +/* L110: */ + } + } + } +/* L120: */ + } + +/* L130: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L140: + + if (l == i__) { + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.f; + } else if (l == i__ - 1) { + +/* + H(I-1,I-2) is negligible: a pair of eigenvalues have converged. + + Transform the 2-by-2 submatrix to standard Schur form, + and compute and store the eigenvalues. +*/ + + slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * + h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * + h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, + &sn); + + if (*wantt) { + +/* Apply the transformation to the rest of H. */ + + if (i2 > i__) { + i__1 = i2 - i__; + srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ + i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * + h_dim1], &c__1, &cs, &sn); + } + if (*wantz) { + +/* Apply the transformation to Z. */ + + srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + + i__ * z_dim1], &c__1, &cs, &sn); + } + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L10; + +L150: + return 0; + +/* End of SLAHQR */ + +} /* slahqr_ */ + +/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a, + integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + real r__1; + + /* Local variables */ + static integer i__; + static real ei; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), scopy_( + integer *, real *, integer *, real *, integer *), saxpy_(integer * + , real *, real *, integer *, real *, integer *), strmv_(char *, + char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *, + integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLAHRD reduces the first NB columns of a real general n-by-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by an orthogonal similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by SGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) REAL array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) REAL array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) REAL array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) REAL array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= N. + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a h a a a ) + ( a h a a a ) + ( a h a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(1:n,i) + + Compute i-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + sgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[* + k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1], + &c__1); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb * + t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k + + i__ + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + saxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + + i__ * a_dim1], &c__1); + + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + +/* + Generate the elementary reflector H(i) to annihilate + A(k+i+1:n,i) +*/ + + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.f; + +/* Compute Y(1:n,i) */ + + i__2 = *n - *k - i__ + 1; + sgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + sgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ * + t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1); + sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); + +/* Compute T(1:i,i) */ + + i__2 = i__ - 1; + r__1 = -tau[i__]; + sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + t[i__ + i__ * t_dim1] = tau[i__]; + +/* L10: */ + } + a[*k + *nb + *nb * a_dim1] = ei; + + return 0; + +/* End of SLAHRD */ + +} /* slahrd_ */ + +/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real * + smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b, + integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale, + real *xnorm, integer *info) +{ + /* Initialized data */ + + static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ }; + static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ }; + static integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2, + 4,3,2,1 }; + + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + real r__1, r__2, r__3, r__4, r__5, r__6; + static real equiv_0[4], equiv_1[4]; + + /* Local variables */ + static integer j; +#define ci (equiv_0) +#define cr (equiv_1) + static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, + cr22, li21, csi, ui11, lr21, ui12, ui22; +#define civ (equiv_0) + static real csr, ur11, ur12, ur22; +#define crv (equiv_1) + static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + static integer icmax; + static real bnorm, cnorm, smini; + extern doublereal slamch_(char *); + static real bignum; + extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * + , real *); + static real smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLALN2 solves a system of the form (ca A - w D ) X = s B + or (ca A' - w D) X = s B with possible scaling ("s") and + perturbation of A. (A' means A-transpose.) + + A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA + real diagonal matrix, w is a real or complex value, and X and B are + NA x 1 matrices -- real if w is real, complex if w is complex. NA + may be 1 or 2. + + If w is complex, X and B are represented as NA x 2 matrices, + the first column of each being the real part and the second + being the imaginary part. + + "s" is a scaling factor (.LE. 1), computed by SLALN2, which is + so chosen that X can be computed without overflow. X is further + scaled if necessary to assure that norm(ca A - w D)*norm(X) is less + than overflow. + + If both singular values of (ca A - w D) are less than SMIN, + SMIN*identity will be used instead of (ca A - w D). If only one + singular value is less than SMIN, one element of (ca A - w D) will be + perturbed enough to make the smallest singular value roughly SMIN. + If both singular values are at least SMIN, (ca A - w D) will not be + perturbed. In any case, the perturbation will be at most some small + multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values + are computed by infinity-norm approximations, and thus will only be + correct to a factor of 2 or so. + + Note: all input quantities are assumed to be smaller than overflow + by a reasonable factor. (See BIGNUM.) + + Arguments + ========== + + LTRANS (input) LOGICAL + =.TRUE.: A-transpose will be used. + =.FALSE.: A will be used (not transposed.) + + NA (input) INTEGER + The size of the matrix A. It may (only) be 1 or 2. + + NW (input) INTEGER + 1 if "w" is real, 2 if "w" is complex. It may only be 1 + or 2. + + SMIN (input) REAL + The desired lower bound on the singular values of A. This + should be a safe distance away from underflow or overflow, + say, between (underflow/machine precision) and (machine + precision * overflow ). (See BIGNUM and ULP.) + + CA (input) REAL + The coefficient c, which A is multiplied by. + + A (input) REAL array, dimension (LDA,NA) + The NA x NA matrix A. + + LDA (input) INTEGER + The leading dimension of A. It must be at least NA. + + D1 (input) REAL + The 1,1 element in the diagonal matrix D. + + D2 (input) REAL + The 2,2 element in the diagonal matrix D. Not used if NW=1. + + B (input) REAL array, dimension (LDB,NW) + The NA x NW matrix B (right-hand side). If NW=2 ("w" is + complex), column 1 contains the real part of B and column 2 + contains the imaginary part. + + LDB (input) INTEGER + The leading dimension of B. It must be at least NA. + + WR (input) REAL + The real part of the scalar "w". + + WI (input) REAL + The imaginary part of the scalar "w". Not used if NW=1. + + X (output) REAL array, dimension (LDX,NW) + The NA x NW matrix X (unknowns), as computed by SLALN2. + If NW=2 ("w" is complex), on exit, column 1 will contain + the real part of X and column 2 will contain the imaginary + part. + + LDX (input) INTEGER + The leading dimension of X. It must be at least NA. + + SCALE (output) REAL + The scale factor that B must be multiplied by to insure + that overflow does not occur when computing X. Thus, + (ca A - w D) X will be SCALE*B, not B (ignoring + perturbations of A.) It will be at most 1. + + XNORM (output) REAL + The infinity-norm of X, when X is regarded as an NA x NW + real matrix. + + INFO (output) INTEGER + An error flag. It will be set to zero if no error occurs, + a negative number if an argument is in error, or a positive + number if ca A - w D had to be perturbed. + The possible values are: + = 0: No error occurred, and (ca A - w D) did not have to be + perturbed. + = 1: (ca A - w D) had to be perturbed to make its smallest + (or only) singular value greater than SMIN. + NOTE: In the interests of speed, this routine does not + check the inputs for errors. + + ===================================================================== +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + + /* Function Body */ + +/* Compute BIGNUM */ + + smlnum = 2.f * slamch_("Safe minimum"); + bignum = 1.f / smlnum; + smini = dmax(*smin,smlnum); + +/* Don't check for input errors */ + + *info = 0; + +/* Standard Initializations */ + + *scale = 1.f; + + if (*na == 1) { + +/* 1 x 1 (i.e., scalar) system C X = B */ + + if (*nw == 1) { + +/* + Real 1x1 system. + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = dabs(csr); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)); + if (cnorm < 1.f && bnorm > 1.f) { + if (bnorm > bignum * cnorm) { + *scale = 1.f / bnorm; + } + } + +/* Compute X */ + + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)); + } else { + +/* + Complex 1x1 system (w is complex) + + C = ca A - w D +*/ + + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = dabs(csr) + dabs(csi); + +/* If | C | < SMINI, use C = SMINI */ + + if (cnorm < smini) { + csr = smini; + csi = 0.f; + cnorm = smini; + *info = 1; + } + +/* Check scaling for X = B / C */ + + bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 << + 1) + 1], dabs(r__2)); + if (cnorm < 1.f && bnorm > 1.f) { + if (bnorm > bignum * cnorm) { + *scale = 1.f / bnorm; + } + } + +/* Compute X */ + + r__1 = *scale * b[b_dim1 + 1]; + r__2 = *scale * b[(b_dim1 << 1) + 1]; + sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + + 1]); + *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 << + 1) + 1], dabs(r__2)); + } + + } else { + +/* + 2x2 System + + Compute the real part of C = ca A - w D (or ca A' - w D ) +*/ + + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + + if (*nw == 1) { + +/* + Real 2x2 system (w is real) + + Find the largest element in C +*/ + + cmax = 0.f; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) { + cmax = (r__1 = crv[j - 1], dabs(r__1)); + icmax = j; + } +/* L10: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[ + b_dim1 + 2], dabs(r__2)); + bnorm = dmax(r__3,r__4); + if (smini < 1.f && bnorm > 1.f) { + if (bnorm > bignum * smini) { + *scale = 1.f / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1.f / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + +/* If smaller pivot < SMINI, use SMINI */ + + if (dabs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; +/* Computing MAX */ + r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2) + ; + bbnd = dmax(r__2,r__3); + if (bbnd > 1.f && dabs(ur22) < 1.f) { + if (bbnd >= bignum * dabs(ur22)) { + *scale = 1.f / bbnd; + } + } + + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (cswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } +/* Computing MAX */ + r__1 = dabs(xr1), r__2 = dabs(xr2); + *xnorm = dmax(r__1,r__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1.f && cmax > 1.f) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + +/* + Complex 2x2 system (w is complex) + + Find the largest element in C +*/ + + ci[0] = -(*wi) * *d1; + ci[1] = 0.f; + ci[2] = 0.f; + ci[3] = -(*wi) * *d2; + cmax = 0.f; + icmax = 0; + + for (j = 1; j <= 4; ++j) { + if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1], + dabs(r__2)) > cmax) { + cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - + 1], dabs(r__2)); + icmax = j; + } +/* L20: */ + } + +/* If norm(C) < SMINI, use SMINI*identity. */ + + if (cmax < smini) { +/* Computing MAX */ + r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 + << 1) + 1], dabs(r__2)), r__6 = (r__3 = b[b_dim1 + 2], + dabs(r__3)) + (r__4 = b[(b_dim1 << 1) + 2], dabs( + r__4)); + bnorm = dmax(r__5,r__6); + if (smini < 1.f && bnorm > 1.f) { + if (bnorm > bignum * smini) { + *scale = 1.f / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + +/* Gaussian elimination with complete pivoting. */ + + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + +/* Code when off-diagonals of pivoted C are real */ + + if (dabs(ur11) > dabs(ui11)) { + temp = ui11 / ur11; +/* Computing 2nd power */ + r__1 = temp; + ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; +/* Computing 2nd power */ + r__1 = temp; + ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + +/* Code when diagonals of pivoted C are real */ + + ur11r = 1.f / ur11; + ui11r = 0.f; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = dabs(ur22) + dabs(ui22); + +/* If smaller pivot < SMINI, use SMINI */ + + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.f; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; +/* Computing MAX */ + r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs( + ui11r))), r__2 = dabs(br2) + dabs(bi2); + bbnd = dmax(r__1,r__2); + if (bbnd > 1.f && u22abs < 1.f) { + if (bbnd >= bignum * u22abs) { + *scale = 1.f / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + + sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (cswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } +/* Computing MAX */ + r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2); + *xnorm = dmax(r__1,r__2); + +/* Further scaling if norm(A) norm(X) > overflow */ + + if (*xnorm > 1.f && cmax > 1.f) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + + return 0; + +/* End of SLALN2 */ + +} /* slaln2_ */ + +#undef crv +#undef civ +#undef cr +#undef ci + + +doublereal slamch_(char *cmach) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real ret_val; + + /* Builtin functions */ + double pow_ri(real *, integer *); + + /* Local variables */ + static real t; + static integer it; + static real rnd, eps, base; + static integer beta; + static real emin, prec, emax; + static integer imin, imax; + static logical lrnd; + static real rmin, rmax, rmach; + extern logical lsame_(char *, char *); + static real small, sfmin; + extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real + *, integer *, real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMCH determines single precision machine parameters. + + Arguments + ========= + + CMACH (input) CHARACTER*1 + Specifies the value to be returned by SLAMCH: + = 'E' or 'e', SLAMCH := eps + = 'S' or 's , SLAMCH := sfmin + = 'B' or 'b', SLAMCH := base + = 'P' or 'p', SLAMCH := eps*base + = 'N' or 'n', SLAMCH := t + = 'R' or 'r', SLAMCH := rnd + = 'M' or 'm', SLAMCH := emin + = 'U' or 'u', SLAMCH := rmin + = 'L' or 'l', SLAMCH := emax + = 'O' or 'o', SLAMCH := rmax + + where + + eps = relative machine precision + sfmin = safe minimum, such that 1/sfmin does not overflow + base = base of the machine + prec = eps*base + t = number of (base) digits in the mantissa + rnd = 1.0 when rounding occurs in addition, 0.0 otherwise + emin = minimum exponent before (gradual) underflow + rmin = underflow threshold - base**(emin-1) + emax = largest exponent before overflow + rmax = overflow threshold - (base**emax)*(1-eps) + + ===================================================================== +*/ + + + if (first) { + first = FALSE_; + slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (real) beta; + t = (real) it; + if (lrnd) { + rnd = 1.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1) / 2; + } else { + rnd = 0.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1); + } + prec = eps * base; + emin = (real) imin; + emax = (real) imax; + sfmin = rmin; + small = 1.f / rmax; + if (small >= sfmin) { + +/* + Use SMALL plus a bit, to avoid the possibility of rounding + causing overflow when computing 1/sfmin. +*/ + + sfmin = small * (eps + 1.f); + } + } + + if (lsame_(cmach, "E")) { + rmach = eps; + } else if (lsame_(cmach, "S")) { + rmach = sfmin; + } else if (lsame_(cmach, "B")) { + rmach = base; + } else if (lsame_(cmach, "P")) { + rmach = prec; + } else if (lsame_(cmach, "N")) { + rmach = t; + } else if (lsame_(cmach, "R")) { + rmach = rnd; + } else if (lsame_(cmach, "M")) { + rmach = emin; + } else if (lsame_(cmach, "U")) { + rmach = rmin; + } else if (lsame_(cmach, "L")) { + rmach = emax; + } else if (lsame_(cmach, "O")) { + rmach = rmax; + } + + ret_val = rmach; + return ret_val; + +/* End of SLAMCH */ + +} /* slamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical + *ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real a, b, c__, f, t1, t2; + static integer lt; + static real one, qtr; + static logical lrnd; + static integer lbeta; + static real savec; + static logical lieee1; + extern doublereal slamc3_(real *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMC1 determines the machine parameters given by BETA, T, RND, and + IEEE1. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + IEEE1 (output) LOGICAL + Specifies whether rounding appears to be done in the IEEE + 'round to nearest' style. + + Further Details + =============== + + The routine is based on the routine ENVRON by Malcolm and + incorporates suggestions by Gentleman and Marovich. See + + Malcolm M. A. (1972) Algorithms to reveal properties of + floating-point arithmetic. Comms. of the ACM, 15, 949-951. + + Gentleman W. M. and Marovich S. B. (1974) More on algorithms + that reveal properties of floating point arithmetic units. + Comms. of the ACM, 17, 276-277. + + ===================================================================== +*/ + + + if (first) { + first = FALSE_; + one = 1.f; + +/* + LBETA, LIEEE1, LT and LRND are the local values of BETA, + IEEE1, T and RND. + + Throughout this routine we use the function SLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + Compute a = 2.0**m with the smallest positive integer m such + that + + fl( a + 1.0 ) = a. +*/ + + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ +L10: + if (c__ == one) { + a *= 2; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L10; + } +/* + + END WHILE + + Now compute b = 2.0**m with the smallest positive integer m + such that + + fl( a + b ) .gt. a. +*/ + + b = 1.f; + c__ = slamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ +L20: + if (c__ == a) { + b *= 2; + c__ = slamc3_(&a, &b); + goto L20; + } +/* + + END WHILE + + Now compute the base. a and c are neighbouring floating point + numbers in the interval ( beta**t, beta**( t + 1 ) ) and so + their difference is beta. Adding 0.25 to c is to ensure that it + is truncated to beta and not ( beta - 1 ). +*/ + + qtr = one / 4; + savec = c__; + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + lbeta = c__ + qtr; + +/* + Now determine whether rounding or chopping occurs, by adding a + bit less than beta/2 and a bit more than beta/2 to a. +*/ + + b = (real) lbeta; + r__1 = b / 2; + r__2 = -b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } else { + lrnd = FALSE_; + } + r__1 = b / 2; + r__2 = b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* + Try and decide whether rounding is done in the IEEE 'round to + nearest' style. B/2 is half a unit in the last place of the two + numbers A and SAVEC. Furthermore, A is even, i.e. has last bit + zero, and SAVEC is odd. Thus adding B/2 to A should not change + A, but adding B/2 to SAVEC should change SAVEC. +*/ + + r__1 = b / 2; + t1 = slamc3_(&r__1, &a); + r__1 = b / 2; + t2 = slamc3_(&r__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* + Now find the mantissa, t. It should be the integer part of + log to the base beta of a, however it is safer to determine t + by powering. So we find t as the smallest positive integer for + which + + fl( beta**t + 1.0 ) = 1.0. +*/ + + lt = 0; + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ +L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + return 0; + +/* End of SLAMC1 */ + +} /* slamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * + eps, integer *emin, real *rmin, integer *emax, real *rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 SLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4, r__5; + + /* Builtin functions */ + double pow_ri(real *, integer *); + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); + + /* Local variables */ + static real a, b, c__; + static integer i__, lt; + static real one, two; + static logical ieee; + static real half; + static logical lrnd; + static real leps, zero; + static integer lbeta; + static real rbase; + static integer lemin, lemax, gnmin; + static real small; + static integer gpmin; + static real third, lrmin, lrmax, sixth; + static logical lieee1; + extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, + logical *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slamc4_(integer *, real *, integer *), + slamc5_(integer *, integer *, integer *, logical *, integer *, + real *); + static integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___620 = { 0, 6, 0, fmt_9999, 0 }; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMC2 determines the machine parameters specified in its argument + list. + + Arguments + ========= + + BETA (output) INTEGER + The base of the machine. + + T (output) INTEGER + The number of ( BETA ) digits in the mantissa. + + RND (output) LOGICAL + Specifies whether proper rounding ( RND = .TRUE. ) or + chopping ( RND = .FALSE. ) occurs in addition. This may not + be a reliable guide to the way in which the machine performs + its arithmetic. + + EPS (output) REAL + The smallest positive number such that + + fl( 1.0 - EPS ) .LT. 1.0, + + where fl denotes the computed value. + + EMIN (output) INTEGER + The minimum exponent before (gradual) underflow occurs. + + RMIN (output) REAL + The smallest normalized number for the machine, given by + BASE**( EMIN - 1 ), where BASE is the floating point value + of BETA. + + EMAX (output) INTEGER + The maximum exponent before overflow occurs. + + RMAX (output) REAL + The largest positive number for the machine, given by + BASE**EMAX * ( 1 - EPS ), where BASE is the floating point + value of BETA. + + Further Details + =============== + + The computation of EPS is based on a routine PARANOIA by + W. Kahan of the University of California at Berkeley. + + ===================================================================== +*/ + + + if (first) { + first = FALSE_; + zero = 0.f; + one = 1.f; + two = 2.f; + +/* + LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of + BETA, T, RND, EPS, EMIN and RMIN. + + Throughout this routine we use the function SLAMC3 to ensure + that relevant values are stored and not held in registers, or + are not affected by optimizers. + + SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +*/ + + slamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (real) lbeta; + i__1 = -lt; + a = pow_ri(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + r__1 = -half; + sixth = slamc3_(&b, &r__1); + third = slamc3_(&sixth, &sixth); + r__1 = -half; + b = slamc3_(&third, &r__1); + b = slamc3_(&b, &sixth); + b = dabs(b); + if (b < leps) { + b = leps; + } + + leps = 1.f; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ +L10: + if (leps > b && b > zero) { + leps = b; + r__1 = half * leps; +/* Computing 5th power */ + r__3 = two, r__4 = r__3, r__3 *= r__3; +/* Computing 2nd power */ + r__5 = leps; + r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); + c__ = slamc3_(&r__1, &r__2); + r__1 = -c__; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + r__1 = -b; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* + Computation of EPS complete. + + Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). + Keep dividing A by BETA until (gradual) underflow occurs. This + is detected when we cannot recover the previous A. +*/ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + r__1 = small * rbase; + small = slamc3_(&r__1, &zero); +/* L20: */ + } + a = slamc3_(&one, &small); + slamc4_(&ngpmin, &one, &lbeta); + r__1 = -one; + slamc4_(&ngnmin, &r__1, &lbeta); + slamc4_(&gpmin, &a, &lbeta); + r__1 = -a; + slamc4_(&gnmin, &r__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* + ( Non twos-complement machines, no gradual underflow; + e.g., VAX ) +*/ + } else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* + ( Non twos-complement machines, with gradual underflow; + e.g., IEEE standard followers ) +*/ + } else { + lemin = min(ngpmin,gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin,ngnmin); +/* + ( Twos-complement machines, no gradual underflow; + e.g., CYBER 205 ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) + { + if (gpmin - min(ngpmin,ngnmin) == 3) { + lemin = max(ngpmin,ngnmin) - 1 + lt; +/* + ( Twos-complement machines with gradual underflow; + no known machine ) +*/ + } else { + lemin = min(ngpmin,ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } else { +/* Computing MIN */ + i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); + lemin = min(i__1,gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } +/* + ** + Comment out this if block if EMIN is ok +*/ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___620); + do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); + e_wsfe(); + } +/* + ** + + Assume IEEE arithmetic if we found denormalised numbers above, + or if arithmetic seems to round in the IEEE style, determined + in routine SLAMC1. A true IEEE machine should have both things + true; however, faulty machines may have one or the other. +*/ + + ieee = ieee || lieee1; + +/* + Compute RMIN by successive division by BETA. We could compute + RMIN as BASE**( EMIN - 1 ), but some machines underflow during + this computation. +*/ + + lrmin = 1.f; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = lrmin * rbase; + lrmin = slamc3_(&r__1, &zero); +/* L30: */ + } + +/* Finally, call SLAMC5 to compute EMAX and RMAX. */ + + slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of SLAMC2 */ + +} /* slamc2_ */ + + +/* *********************************************************************** */ + +doublereal slamc3_(real *a, real *b) +{ + /* System generated locals */ + real ret_val; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMC3 is intended to force A and B to be stored prior to doing + the addition of A and B , for use in situations where optimizers + might hold one of these in a register. + + Arguments + ========= + + A, B (input) REAL + The values A and B. + + ===================================================================== +*/ + + + ret_val = *a + *b; + + return ret_val; + +/* End of SLAMC3 */ + +} /* slamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a; + static integer i__; + static real b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal slamc3_(real *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMC4 is a service routine for SLAMC2. + + Arguments + ========= + + EMIN (output) EMIN + The minimum exponent before (gradual) underflow, computed by + setting A = START and dividing by BASE until the previous A + can not be recovered. + + START (input) REAL + The starting point for determining EMIN. + + BASE (input) INTEGER + The base of the machine. + + ===================================================================== +*/ + + + a = *start; + one = 1.f; + rbase = one / *base; + zero = 0.f; + *emin = 1; + r__1 = a * rbase; + b1 = slamc3_(&r__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. + $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP +*/ +L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + r__1 = a / *base; + b1 = slamc3_(&r__1, &zero); + r__1 = b1 * *base; + c1 = slamc3_(&r__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + r__1 = a * rbase; + b2 = slamc3_(&r__1, &zero); + r__1 = b2 / rbase; + c2 = slamc3_(&r__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of SLAMC4 */ + +} /* slamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, + logical *ieee, integer *emax, real *rmax) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static integer i__; + static real y, z__; + static integer try__, lexp; + static real oldy; + static integer uexp, nbits; + extern doublereal slamc3_(real *, real *); + static real recbas; + static integer exbits, expsum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAMC5 attempts to compute RMAX, the largest machine floating-point + number, without overflow. It assumes that EMAX + abs(EMIN) sum + approximately to a power of 2. It will fail on machines where this + assumption does not hold, for example, the Cyber 205 (EMIN = -28625, + EMAX = 28718). It will also fail if the value supplied for EMIN is + too large (i.e. too close to zero), probably with overflow. + + Arguments + ========= + + BETA (input) INTEGER + The base of floating-point arithmetic. + + P (input) INTEGER + The number of base BETA digits in the mantissa of a + floating-point value. + + EMIN (input) INTEGER + The minimum exponent before (gradual) underflow. + + IEEE (input) LOGICAL + A logical flag specifying whether or not the arithmetic + system is thought to comply with the IEEE standard. + + EMAX (output) INTEGER + The largest exponent before overflow + + RMAX (output) REAL + The largest machine floating-point number. + + ===================================================================== + + + First compute LEXP and UEXP, two powers of 2 that bound + abs(EMIN). We then assume that EMAX + abs(EMIN) will sum + approximately to the bound that is closest to abs(EMIN). + (EMAX is the exponent of the required number RMAX). +*/ + + lexp = 1; + exbits = 1; +L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } else { + uexp = try__; + ++exbits; + } + +/* + Now -LEXP is less than or equal to EMIN, and -UEXP is greater + than or equal to EMIN. EXBITS is the number of bits needed to + store the exponent. +*/ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } else { + expsum = uexp << 1; + } + +/* + EXPSUM is the exponent range, approximately equal to + EMAX - EMIN + 1 . +*/ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* + NBITS is the total number of bits needed to store a + floating-point number. +*/ + + if (nbits % 2 == 1 && *beta == 2) { + +/* + Either there are an odd number of bits used to store a + floating-point number, which is unlikely, or some bits are + not used in the representation of numbers, which is possible, + (e.g. Cray machines) or the mantissa has an implicit bit, + (e.g. IEEE machines, Dec Vax machines), which is perhaps the + most likely. We have to assume the last alternative. + If this is true, then we need to reduce EMAX by one because + there must be some way of representing zero in an implicit-bit + system. On machines like Cray, we are reducing EMAX by one + unnecessarily. +*/ + + --(*emax); + } + + if (*ieee) { + +/* + Assume we are on an IEEE machine which reserves one exponent + for infinity and NaN. +*/ + + --(*emax); + } + +/* + Now create RMAX, the largest machine number, which should + be equal to (1.0 - BETA**(-P)) * BETA**EMAX . + + First compute 1.0 - BETA**(-P), being careful that the + result is less than 1.0 . +*/ + + recbas = 1.f / *beta; + z__ = *beta - 1.f; + y = 0.f; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.f) { + oldy = y; + } + y = slamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.f) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = y * *beta; + y = slamc3_(&r__1, &c_b29); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of SLAMC5 */ + +} /* slamc5_ */ + +/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer * + strd1, integer *strd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + static integer i__, ind1, ind2, n1sv, n2sv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAMRG will create a permutation list which will merge the elements + of A (which is composed of two independently sorted sets) into a + single set which is sorted in ascending order. + + Arguments + ========= + + N1 (input) INTEGER + N2 (input) INTEGER + These arguements contain the respective lengths of the two + sorted lists to be merged. + + A (input) REAL array, dimension (N1+N2) + The first N1 elements of A contain a list of numbers which + are sorted in either ascending or descending order. Likewise + for the final N2 elements. + + STRD1 (input) INTEGER + STRD2 (input) INTEGER + These are the strides to be taken through the array A. + Allowable strides are 1 and -1. They indicate whether a + subset of A is sorted in ascending (STRDx = 1) or descending + (STRDx = -1) order. + + INDEX (output) INTEGER array, dimension (N1+N2) + On exit this array will contain a permutation such that + if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be + sorted in ascending order. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*strd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*strd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *strd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *strd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *strd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *strd1; +/* L30: */ + } + } + + return 0; + +/* End of SLAMRG */ + +} /* slamrg_ */ + +doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda, + real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real matrix A. + + Description + =========== + + SLANGE returns the value + + SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + SLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + SLANGE is set to zero. + + A (input) REAL array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + value = dmax(r__2,r__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of SLANGE */ + +} /* slange_ */ + +doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + real ret_val, r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLANHS returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + Hessenberg matrix A. + + Description + =========== + + SLANHS returns the value + + SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANHS as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, SLANHS is + set to zero. + + A (input) REAL array, dimension (LDA,N) + The n by n upper Hessenberg matrix A; the part of A below the + first sub-diagonal is not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= N when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + value = dmax(r__2,r__3); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L30: */ + } + value = dmax(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.f; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of SLANHS */ + +} /* slanhs_ */ + +doublereal slanst_(char *norm, integer *n, real *d__, real *e) +{ + /* System generated locals */ + integer i__1; + real ret_val, r__1, r__2, r__3, r__4, r__5; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__; + static real sum, scale; + extern logical lsame_(char *, char *); + static real anorm; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLANST returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric tridiagonal matrix A. + + Description + =========== + + SLANST returns the value + + SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANST as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, SLANST is + set to zero. + + D (input) REAL array, dimension (N) + The diagonal elements of A. + + E (input) REAL array, dimension (N-1) + The (n-1) sub-diagonal or super-diagonal elements of A. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + anorm = (r__1 = d__[*n], dabs(r__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1)); + anorm = dmax(r__2,r__3); +/* Computing MAX */ + r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1)); + anorm = dmax(r__2,r__3); +/* L10: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1' || lsame_(norm, "I")) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = dabs(d__[1]); + } else { +/* Computing MAX */ + r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs( + r__1)) + (r__2 = d__[*n], dabs(r__2)); + anorm = dmax(r__3,r__4); + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 = + e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3)); + anorm = dmax(r__4,r__5); +/* L20: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (*n > 1) { + i__1 = *n - 1; + slassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + slassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of SLANST */ + +} /* slanst_ */ + +doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda, + real *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real ret_val, r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static real sum, absa, scale; + extern logical lsame_(char *, char *); + static real value; + extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, + real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLANSY returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + real symmetric matrix A. + + Description + =========== + + SLANSY returns the value + + SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in SLANSY as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, SLANSY is + set to zero. + + A (input) REAL array, dimension (LDA,N) + The symmetric matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) REAL array, dimension (LWORK), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.f; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( + r__1)); + value = dmax(r__2,r__3); +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { +/* Computing MAX */ + r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs( + r__1)); + value = dmax(r__2,r__3); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = value, r__2 = work[i__]; + value = dmax(r__1,r__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.f; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = dmax(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.f; + sum = 1.f; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + slassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of SLANSY */ + +} /* slansy_ */ + +/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real * + rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn) +{ + /* System generated locals */ + real r__1, r__2; + + /* Builtin functions */ + double r_sign(real *, real *), sqrt(doublereal); + + /* Local variables */ + static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, + scale, bcmax, bcmis, sigma; + extern doublereal slapy2_(real *, real *), slamch_(char *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric + matrix in standard form: + + [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] + [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] + + where either + 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or + 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex + conjugate eigenvalues. + + Arguments + ========= + + A (input/output) REAL + B (input/output) REAL + C (input/output) REAL + D (input/output) REAL + On entry, the elements of the input matrix. + On exit, they are overwritten by the elements of the + standardised Schur form. + + RT1R (output) REAL + RT1I (output) REAL + RT2R (output) REAL + RT2I (output) REAL + The real and imaginary parts of the eigenvalues. If the + eigenvalues are a complex conjugate pair, RT1I > 0. + + CS (output) REAL + SN (output) REAL + Parameters of the rotation matrix. + + Further Details + =============== + + Modified by V. Sima, Research Institute for Informatics, Bucharest, + Romania, to reduce the risk of cancellation errors, + when computing real eigenvalues, and to ensure, if possible, that + abs(RT1R) >= abs(RT2R). + + ===================================================================== +*/ + + + eps = slamch_("P"); + if (*c__ == 0.f) { + *cs = 1.f; + *sn = 0.f; + goto L10; + + } else if (*b == 0.f) { + +/* Swap rows and columns */ + + *cs = 0.f; + *sn = 1.f; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.f; + goto L10; + } else if (*a - *d__ == 0.f && r_sign(&c_b15, b) != r_sign(&c_b15, c__)) { + *cs = 1.f; + *sn = 0.f; + goto L10; + } else { + + temp = *a - *d__; + p = temp * .5f; +/* Computing MAX */ + r__1 = dabs(*b), r__2 = dabs(*c__); + bcmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = dabs(*b), r__2 = dabs(*c__); + bcmis = dmin(r__1,r__2) * r_sign(&c_b15, b) * r_sign(&c_b15, c__); +/* Computing MAX */ + r__1 = dabs(p); + scale = dmax(r__1,bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + +/* + If Z is of the order of the machine accuracy, postpone the + decision on the nature of eigenvalues +*/ + + if (z__ >= eps * 4.f) { + +/* Real eigenvalues. Compute A and D. */ + + r__1 = sqrt(scale) * sqrt(z__); + z__ = p + r_sign(&r__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + +/* Compute B and the rotation matrix */ + + tau = slapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.f; + } else { + +/* + Complex eigenvalues, or real (almost) equal eigenvalues. + Make diagonal elements equal. +*/ + + sigma = *b + *c__; + tau = slapy2_(&sigma, &temp); + *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f); + *sn = -(p / (tau * *cs)) * r_sign(&c_b15, &sigma); + +/* + Compute [ AA BB ] = [ A B ] [ CS -SN ] + [ CC DD ] [ C D ] [ SN CS ] +*/ + + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + +/* + Compute [ A B ] = [ CS SN ] [ AA BB ] + [ C D ] [-SN CS ] [ CC DD ] +*/ + + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + + temp = (*a + *d__) * .5f; + *a = temp; + *d__ = temp; + + if (*c__ != 0.f) { + if (*b != 0.f) { + if (r_sign(&c_b15, b) == r_sign(&c_b15, c__)) { + +/* Real eigenvalues: reduce to upper triangular form */ + + sab = sqrt((dabs(*b))); + sac = sqrt((dabs(*c__))); + r__1 = sab * sac; + p = r_sign(&r__1, c__); + tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.f; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.f; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + + } + +L10: + +/* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */ + + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.f) { + *rt1i = 0.f; + *rt2i = 0.f; + } else { + *rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__))); + *rt2i = -(*rt1i); + } + return 0; + +/* End of SLANV2 */ + +} /* slanv2_ */ + +doublereal slapy2_(real *x, real *y) +{ + /* System generated locals */ + real ret_val, r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real w, z__, xabs, yabs; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary + overflow. + + Arguments + ========= + + X (input) REAL + Y (input) REAL + X and Y specify the values x and y. + + ===================================================================== +*/ + + + xabs = dabs(*x); + yabs = dabs(*y); + w = dmax(xabs,yabs); + z__ = dmin(xabs,yabs); + if (z__ == 0.f) { + ret_val = w; + } else { +/* Computing 2nd power */ + r__1 = z__ / w; + ret_val = w * sqrt(r__1 * r__1 + 1.f); + } + return ret_val; + +/* End of SLAPY2 */ + +} /* slapy2_ */ + +doublereal slapy3_(real *x, real *y, real *z__) +{ + /* System generated locals */ + real ret_val, r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real w, xabs, yabs, zabs; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause + unnecessary overflow. + + Arguments + ========= + + X (input) REAL + Y (input) REAL + Z (input) REAL + X, Y and Z specify the values x, y and z. + + ===================================================================== +*/ + + + xabs = dabs(*x); + yabs = dabs(*y); + zabs = dabs(*z__); +/* Computing MAX */ + r__1 = max(xabs,yabs); + w = dmax(r__1,zabs); + if (w == 0.f) { + ret_val = 0.f; + } else { +/* Computing 2nd power */ + r__1 = xabs / w; +/* Computing 2nd power */ + r__2 = yabs / w; +/* Computing 2nd power */ + r__3 = zabs / w; + ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3); + } + return ret_val; + +/* End of SLAPY3 */ + +} /* slapy3_ */ + +/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v, + integer *incv, real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + real r__1; + + /* Local variables */ + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLARF applies a real elementary reflector H to a real m by n matrix + C, from either the left or the right. H is represented in the form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) REAL array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) REAL + The value tau in the representation of H. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (lsame_(side, "L")) { + +/* Form H * C */ + + if (*tau != 0.f) { + +/* w := C' * v */ + + sgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], + incv, &c_b29, &work[1], &c__1); + +/* C := C - v * w' */ + + r__1 = -(*tau); + sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], + ldc); + } + } else { + +/* Form C * H */ + + if (*tau != 0.f) { + +/* w := C * v */ + + sgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], + incv, &c_b29, &work[1], &c__1); + +/* C := C - w * v' */ + + r__1 = -(*tau); + sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], + ldc); + } + } + return 0; + +/* End of SLARF */ + +} /* slarf_ */ + +/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, real *v, integer *ldv, + real *t, integer *ldt, real *c__, integer *ldc, real *work, integer * + ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *), scopy_(integer *, real *, + integer *, real *, integer *), strmm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLARFB applies a real block reflector H or its transpose H' to a + real m by n matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'T': apply H' (Transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) REAL array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) REAL array, dimension (LDT,K) + The triangular k by k matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) REAL array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2 */ + + i__1 = *m - *k; + sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, & + c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], + ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = *m - *k; + sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151, + &v[*k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc); + } + +/* W := W * V1' */ + + strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + sgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = *n - *k; + sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151, + &work[work_offset], ldwork, &v[*k + 1 + v_dim1], + ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc); + } + +/* W := W * V1' */ + + strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*m > *k) { + +/* W := W + C1'*V1 */ + + i__1 = *m - *k; + sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = *m - *k; + sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151, + &v[v_offset], ldv, &work[work_offset], ldwork, & + c_b15, &c__[c_offset], ldc) + ; + } + +/* W := W * V2' */ + + strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, & + v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + sgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b15, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = *n - *k; + sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151, + &work[work_offset], ldwork, &v[v_offset], ldv, & + c_b15, &c__[c_offset], ldc) + ; + } + +/* W := W * V2' */ + + strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, & + v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2' */ + + i__1 = *m - *k; + sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, & + c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b15, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = *m - *k; + sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[ + (*k + 1) * v_dim1 + 1], ldv, &work[work_offset], + ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc); + } + +/* W := W * V1 */ + + strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, & + v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2' */ + + i__1 = *n - *k; + sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, & + c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + sgemm_("No transpose", "No transpose", m, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + + 1], ldc); + } + +/* W := W * V1 */ + + strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, & + v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork); + if (*m > *k) { + +/* W := W + C1'*V1' */ + + i__1 = *m - *k; + sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = *m - *k; + sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[ + v_offset], ldv, &work[work_offset], ldwork, & + c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, & + v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork); + if (*n > *k) { + +/* W := W + C1 * V1' */ + + i__1 = *n - *k; + sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, & + work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + sgemm_("No transpose", "No transpose", m, &i__1, k, & + c_b151, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b15, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of SLARFB */ + +} /* slarfb_ */ + +/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx, + real *tau) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Builtin functions */ + double r_sign(real *, real *); + + /* Local variables */ + static integer j, knt; + static real beta; + extern doublereal snrm2_(integer *, real *, integer *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real xnorm; + extern doublereal slapy2_(real *, real *), slamch_(char *); + static real safmin, rsafmn; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLARFG generates a real elementary reflector H of order n, such + that + + H * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, and x is an (n-1)-element real + vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a real scalar and v is a real (n-1)-element + vector. + + If the elements of x are all zero, then tau = 0 and H is taken to be + the unit matrix. + + Otherwise 1 <= tau <= 2. + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) REAL + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) REAL array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) REAL + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.f; + return 0; + } + + i__1 = *n - 1; + xnorm = snrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.f) { + +/* H = I */ + + *tau = 0.f; + } else { + +/* general case */ + + r__1 = slapy2_(alpha, &xnorm); + beta = -r_sign(&r__1, alpha); + safmin = slamch_("S") / slamch_("E"); + if (dabs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1.f / safmin; + knt = 0; +L10: + ++knt; + i__1 = *n - 1; + sscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (dabs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = snrm2_(&i__1, &x[1], incx); + r__1 = slapy2_(alpha, &xnorm); + beta = -r_sign(&r__1, alpha); + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + r__1 = 1.f / (*alpha - beta); + sscal_(&i__1, &r__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + *alpha = beta; + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + *alpha *= safmin; +/* L20: */ + } + } else { + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + r__1 = 1.f / (*alpha - beta); + sscal_(&i__1, &r__1, &x[1], incx); + *alpha = beta; + } + } + + return 0; + +/* End of SLARFG */ + +} /* slarfg_ */ + +/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer * + k, real *v, integer *ldv, real *tau, real *t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + real r__1; + + /* Local variables */ + static integer i__, j; + static real vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, + integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLARFT forms the triangular factor T of a real block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) REAL array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) REAL array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.f; +/* L10: */ + } + } else { + +/* general case */ + + vii = v[i__ + i__ * v_dim1]; + v[i__ + i__ * v_dim1] = 1.f; + if (lsame_(storev, "C")) { + +/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1], + ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[ + i__ * t_dim1 + 1], &c__1); + } else { + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ + + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b29, &t[i__ * t_dim1 + 1], &c__1); + } + v[i__ + i__ * v_dim1] = vii; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L20: */ + } + } else { + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.f) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.f; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + vii = v[*n - *k + i__ + i__ * v_dim1]; + v[*n - *k + i__ + i__ * v_dim1] = 1.f; + +/* + T(i+1:k,i) := + - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +*/ + + i__1 = *n - *k + i__; + i__2 = *k - i__; + r__1 = -tau[i__]; + sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1) + * v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], & + c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], & + c__1); + v[*n - *k + i__ + i__ * v_dim1] = vii; + } else { + vii = v[i__ + (*n - *k + i__) * v_dim1]; + v[i__ + (*n - *k + i__) * v_dim1] = 1.f; + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +*/ + + i__1 = *k - i__; + i__2 = *n - *k + i__; + r__1 = -tau[i__]; + sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & + c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1); + v[i__ + (*n - *k + i__) * v_dim1] = vii; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + } + t[i__ + i__ * t_dim1] = tau[i__]; + } +/* L40: */ + } + } + return 0; + +/* End of SLARFT */ + +} /* slarft_ */ + +/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v, + real *tau, real *c__, integer *ldc, real *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + real r__1; + + /* Local variables */ + static integer j; + static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, + v7, v8, v9, t10, v10, sum; + extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLARFX applies a real elementary reflector H to a real m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a real scalar and v is a real vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) REAL array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) REAL + The value tau in the representation of H. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= (1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (*tau == 0.f) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* + Code for general M + + w := C'*v +*/ + + sgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, & + c_b29, &work[1], &c__1); + +/* C := C - tau * v * w' */ + + r__1 = -(*tau); + sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc) + ; + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + t1 = 1.f - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * + c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[ + j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j * + c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * + c_dim1 + 9] + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* + Code for general N + + w := C * v +*/ + + sgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], & + c__1, &c_b29, &work[1], &c__1); + +/* C := C - tau * w * v' */ + + r__1 = -(*tau); + sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc) + ; + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + t1 = 1.f - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * + c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 * + c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[ + j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[ + j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of SLARFX */ + +} /* slarfx_ */ + +/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Builtin functions */ + double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal); + + /* Local variables */ + static integer i__; + static real f1, g1, eps, scale; + static integer count; + static real safmn2, safmx2; + extern doublereal slamch_(char *); + static real safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLARTG generate a plane rotation so that + + [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. + [ -SN CS ] [ G ] [ 0 ] + + This is a slower, more accurate version of the BLAS1 routine SROTG, + with the following other differences: + F and G are unchanged on return. + If G=0, then CS=1 and SN=0. + If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any + floating point operations (saves work in SBDSQR when + there are zeros on the diagonal). + + If F exceeds G in magnitude, CS will be positive. + + Arguments + ========= + + F (input) REAL + The first component of vector to be rotated. + + G (input) REAL + The second component of vector to be rotated. + + CS (output) REAL + The cosine of the rotation. + + SN (output) REAL + The sine of the rotation. + + R (output) REAL + The nonzero component of the rotated vector. + + ===================================================================== +*/ + + + if (first) { + first = FALSE_; + safmin = slamch_("S"); + eps = slamch_("E"); + r__1 = slamch_("B"); + i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / + 2.f); + safmn2 = pow_ri(&r__1, &i__1); + safmx2 = 1.f / safmn2; + } + if (*g == 0.f) { + *cs = 1.f; + *sn = 0.f; + *r__ = *f; + } else if (*f == 0.f) { + *cs = 0.f; + *sn = 1.f; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + r__1 = dabs(f1), r__2 = dabs(g1); + scale = dmax(r__1,r__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + r__1 = f1; +/* Computing 2nd power */ + r__2 = g1; + *r__ = sqrt(r__1 * r__1 + r__2 * r__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (dabs(*f) > dabs(*g) && *cs < 0.f) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of SLARTG */ + +} /* slartg_ */ + +/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax) +{ + /* System generated locals */ + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SLAS2 computes the singular values of the 2-by-2 matrix + [ F G ] + [ 0 H ]. + On return, SSMIN is the smaller singular value and SSMAX is the + larger singular value. + + Arguments + ========= + + F (input) REAL + The (1,1) element of the 2-by-2 matrix. + + G (input) REAL + The (1,2) element of the 2-by-2 matrix. + + H (input) REAL + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) REAL + The smaller singular value. + + SSMAX (output) REAL + The larger singular value. + + Further Details + =============== + + Barring over/underflow, all output quantities are correct to within + a few units in the last place (ulps), even in the absence of a guard + digit in addition/subtraction. + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows, or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ==================================================================== +*/ + + + fa = dabs(*f); + ga = dabs(*g); + ha = dabs(*h__); + fhmn = dmin(fa,ha); + fhmx = dmax(fa,ha); + if (fhmn == 0.f) { + *ssmin = 0.f; + if (fhmx == 0.f) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + r__1 = dmin(fhmx,ga) / dmax(fhmx,ga); + *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.f; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + r__1 = ga / fhmx; + au = r__1 * r__1; + c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.f) { + +/* + Avoid possible harmful underflow if exponent range + asymmetric (true SSMIN may not underflow even if + AU underflows) +*/ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.f; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + r__1 = as * au; +/* Computing 2nd power */ + r__2 = at * au; + c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f) + ); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of SLAS2 */ + +} /* slas2_ */ + +/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real * + cfrom, real *cto, integer *m, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static real mul, cto1; + static logical done; + static real ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static real cfrom1; + extern doublereal slamch_(char *); + static real cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLASCL multiplies the M by N real matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) REAL + CTO (input) REAL + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,M) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.f) { + *info = -4; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = slamch_("S"); + bignum = 1.f / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + cto1 = ctoc / bignum; + if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (dabs(cto1) > dabs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of SLASCL */ + +} /* slascl_ */ + +/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, + real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, + integer *iwork, real *work, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, + iwk, lvl, ndb1, nlp1, nrp1; + static real beta; + static integer idxq, nlvl; + static real alpha; + static integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real + *, real *, real *, real *, integer *, real *, integer *, integer * + , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer + *, integer *, real *, real *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *), slasdt_(integer * + , integer *, integer *, integer *, integer *, integer *, integer * + ); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + Using a divide and conquer approach, SLASD0 computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M + matrix B with diagonal D and offdiagonal E, where M = N + SQRE. + The algorithm computes orthogonal matrices U and VT such that + B = U * S * VT. The singular values S are overwritten on D. + + A related subroutine, SLASDA, computes only the singular values, + and optionally, the singular vectors in compact form. + + Arguments + ========= + + N (input) INTEGER + On entry, the row dimension of the upper bidiagonal matrix. + This is also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N+1; + + D (input/output) REAL array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. + On exit D, if INFO = 0, contains its singular values. + + E (input) REAL array, dimension (M-1) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) REAL array, dimension at least (LDQ, N) + On exit, U contains the left singular vectors. + + LDU (input) INTEGER + On entry, leading dimension of U. + + VT (output) REAL array, dimension at least (LDVT, M) + On exit, VT' contains the right singular vectors. + + LDVT (input) INTEGER + On entry, leading dimension of VT. + + SMLSIZ (input) INTEGER + On entry, maximum size of the subproblems at the + bottom of the computation tree. + + IWORK INTEGER work array. + Dimension must be at least (8 * N) + + WORK REAL work array. + Dimension must be at least (3 * M**2 + 2 * M) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + + m = *n + *sqre; + + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD0", &i__1); + return 0; + } + +/* If the input matrix is too small, call SLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], + ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); + return 0; + } + +/* Set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + For the nodes on bottom level of the tree, solve + their subproblems by SLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[ + nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[ + nlf + nlf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; +/* L10: */ + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[ + nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[ + nrf + nrf * u_dim1], ldu, &work[1], info); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + for (lvl = nlvl; lvl >= 1; --lvl) { + +/* + Find the first node LF and last node LL on the + current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * + u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[ + idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of SLASD0 */ + +} /* slasd0_ */ + +/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real * + d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, + integer *ldvt, integer *idxq, integer *iwork, real *work, integer * + info) +{ + /* System generated locals */ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, + idxc, idxp, ldvt2; + extern /* Subroutine */ int slasd2_(integer *, integer *, integer *, + integer *, real *, real *, real *, real *, real *, integer *, + real *, integer *, real *, real *, integer *, real *, integer *, + integer *, integer *, integer *, integer *, integer *, integer *), + slasd3_(integer *, integer *, integer *, integer *, real *, real + *, integer *, real *, real *, integer *, real *, integer *, real * + , integer *, real *, integer *, integer *, integer *, real *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slamrg_(integer *, + integer *, real *, integer *, integer *, integer *); + static real orgnrm; + static integer coltyp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, + where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. + + A related subroutine SLASD7 handles the case in which the singular + values (and the singular vectors in factored form) are desired. + + SLASD1 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The left singular vectors of the original matrix are stored in U, and + the transpose of the right singular vectors are stored in VT, and the + singular values are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or when there are zeros in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLASD2. + + The second stage consists of calculating the updated + singular values. This is done by finding the square roots of the + roots of the secular equation via the routine SLASD4 (as called + by SLASD3). This routine also calculates the singular vectors of + the current problem. + + The final stage consists of computing the updated singular vectors + directly using the updated singular values. The singular vectors + for the current problem are multiplied with the singular vectors + from the overall problem. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) REAL array, + dimension (N = NL+NR+1). + On entry D(1:NL,1:NL) contains the singular values of the + upper block; and D(NL+2:N) contains the singular values of + the lower block. On exit D(1:N) contains the singular values + of the modified matrix. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + U (input/output) REAL array, dimension(LDU,N) + On entry U(1:NL, 1:NL) contains the left singular vectors of + the upper block; U(NL+2:N, NL+2:N) contains the left singular + vectors of the lower block. On exit U contains the left + singular vectors of the bidiagonal matrix. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= max( 1, N ). + + VT (input/output) REAL array, dimension(LDVT,M) + where M = N + SQRE. + On entry VT(1:NL+1, 1:NL+1)' contains the right singular + vectors of the upper block; VT(NL+2:M, NL+2:M)' contains + the right singular vectors of the lower block. On exit + VT' contains the right singular vectors of the + bidiagonal matrix. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= max( 1, M ). + + IDXQ (output) INTEGER array, dimension(N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension( 4 * N ) + + WORK (workspace) REAL array, dimension( 3*M**2 + 2*M ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD1", &i__1); + return 0; + } + + n = *nl + *nr + 1; + m = n + *sqre; + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLASD2 and SLASD3. +*/ + + ldu2 = n; + ldvt2 = m; + + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + +/* + Scale. + + Computing MAX +*/ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + orgnrm = dmax(r__1,r__2); + d__[*nl + 1] = 0.f; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { + orgnrm = (r__1 = d__[i__], dabs(r__1)); + } +/* L10: */ + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Deflate singular values. */ + + slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], + ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, & + work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], & + idxq[1], &iwork[coltyp], info); + +/* Solve Secular Equation and update singular vectors. */ + + ldq = k; + slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[ + u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ + ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = k; + n2 = n - k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of SLASD1 */ + +} /* slasd1_ */ + +/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * + ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, + real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, + integer *idxq, integer *coltyp, integer *info) +{ + /* System generated locals */ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, + vt2_dim1, vt2_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static real c__; + static integer i__, j, m, n; + static real s; + static integer k2; + static real z1; + static integer ct, jp; + static real eps, tau, tol; + static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer idxjp, jprev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static real hlftol; + extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *), slaset_(char *, integer *, + integer *, real *, real *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASD2 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + singular values are close together or if there is a tiny entry in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + SLASD2 is called from SLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + D (input/output) REAL array, dimension(N) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + U (input/output) REAL array, dimension(LDU,N) + On entry U contains the left singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL, NL), and (NL+2, NL+2), (N,N). + On exit U contains the trailing (N-K) updated left singular + vectors (those which were deflated) in its last N-K columns. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + Z (output) REAL array, dimension(N) + On exit Z contains the updating row vector in the secular + equation. + + DSIGMA (output) REAL array, dimension (N) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + U2 (output) REAL array, dimension(LDU2,N) + Contains a copy of the first K-1 left singular vectors which + will be used by SLASD3 in a matrix multiply (SGEMM) to solve + for the new left singular vectors. U2 is arranged into four + blocks. The first block contains a column with 1 at NL+1 and + zero everywhere else; the second block contains non-zero + entries only at and above NL; the third contains non-zero + entries only below NL+1; and the fourth is dense. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (input/output) REAL array, dimension(LDVT,M) + On entry VT' contains the right singular vectors of two + submatrices in the two square blocks with corners at (1,1), + (NL+1, NL+1), and (NL+2, NL+2), (M,M). + On exit VT' contains the trailing (N-K) updated right singular + vectors (those which were deflated) in its last N-K columns. + In case SQRE =1, the last row of VT spans the right null + space. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= M. + + VT2 (output) REAL array, dimension(LDVT2,N) + VT2' contains a copy of the first K right singular vectors + which will be used by SLASD3 in a matrix multiply (SGEMM) to + solve for the new right singular vectors. VT2 is arranged into + three blocks. The first block contains a row that corresponds + to the special 0 diagonal element in SIGMA; the second block + contains non-zeros only at and before NL +1; the third block + contains non-zeros only at and after NL +2. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= M. + + IDXP (workspace) INTEGER array, dimension(N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDX (workspace) INTEGER array, dimension(N) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXC (output) INTEGER array, dimension(N) + This will contain the permutation used to arrange the columns + of the deflated U matrix into three groups: the first group + contains non-zero entries only at and above NL, the second + contains non-zero entries only below NL+2, and the third is + dense. + + COLTYP (workspace/output) INTEGER array, dimension(N) + As workspace, this will contain a label which will indicate + which of the following types a column in the U2 matrix or a + row in the VT2 matrix is: + 1 : non-zero in the upper half only + 2 : non-zero in the lower half only + 3 : dense + 4 : deflated + + On exit, it is an array of dimension 4, with COLTYP(I) being + the dimension of the I-th type columns. + + IDXQ (input) INTEGER array, dimension(N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first hlaf of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD2", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + +/* + Generate the first part of the vector Z; and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; +/* L20: */ + } + +/* Initialize some reference arrays. */ + + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L30: */ + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; +/* L40: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L50: */ + } + +/* + DSIGMA, IDXC, IDXC, and the first column of U2 + are used as storage space. +*/ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; +/* L60: */ + } + + slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; +/* L70: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + tol = dmax(r__1,r__2); +/* Computing MAX */ + r__2 = (r__1 = d__[n], dabs(r__1)); + tol = eps * 8.f * dmax(r__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } +/* L80: */ + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + s = z__[jprev]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.f; + +/* + Apply back the Givens rotation to the left and right + singular vector matrices. +*/ + + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & + c__1, &c__, &s); + srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & + c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + +/* Record the last singular value. */ + + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L120: + +/* + Count up the total number of the various types of columns, then + form a permutation which positions the four column types into + four groups of uniform structure (although one or more of these + groups may be empty). +*/ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L130: */ + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L140: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + +/* + Fill out the IDXC array so that the permutation which it induces + will place all type-1 columns first, all type-2 columns next, + then all type-3's, and finally all type-4's, starting from the + second column. This applies similarly to the rows of VT. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L150: */ + } + +/* + Sort the singular values and corresponding singular vectors into + DSIGMA, U2, and VT2 respectively. The singular values/vectors + which were not deflated go into the first K slots of DSIGMA, U2, + and VT2 respectively, while those which were deflated go into the + last N - K slots, except that the first column/row will be treated + separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); +/* L160: */ + } + +/* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ + + dsigma[1] = 0.f; + hlftol = tol / 2.f; + if (dabs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = slapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.f; + s = 0.f; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (dabs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Move the rest of the updating row to Z. */ + + i__1 = *k - 1; + scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + +/* + Determine the first column of U2, the first row of VT2 and the + last row of VT. +*/ + + slaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2); + u2[nlp1 + u2_dim1] = 1.f; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; +/* L170: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; +/* L180: */ + } + } else { + scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + +/* + The deflated singular values and their corresponding vectors go + into the back of D, U, and V respectively. +*/ + + if (n > *k) { + i__1 = n - *k; + scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) + * u_dim1 + 1], ldu); + i__1 = n - *k; + slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + + vt_dim1], ldvt); + } + +/* Copy CTOT into COLTYP for referencing in SLASD3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L190: */ + } + + return 0; + +/* End of SLASD2 */ + +} /* slasd2_ */ + +/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer + *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * + ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, + integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * + info) +{ + /* System generated locals */ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, + vt_offset, vt2_dim1, vt2_offset, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static integer i__, j, m, n, jc; + static real rho; + static integer nlp1, nlp2, nrp1; + static real temp; + extern doublereal snrm2_(integer *, real *, integer *); + static integer ctemp; + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer ktemp; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *), xerbla_(char *, + integer *), slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASD3 finds all the square roots of the roots of the secular + equation, as defined by the values in D and Z. It makes the + appropriate calls to SLASD4 and then updates the singular + vectors by matrix multiplication. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + SLASD3 is called from SLASD1. + + Arguments + ========= + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (input) INTEGER + The size of the secular equation, 1 =< K = < N. + + D (output) REAL array, dimension(K) + On exit the square roots of the roots of the secular equation, + in ascending order. + + Q (workspace) REAL array, + dimension at least (LDQ,K). + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= K. + + DSIGMA (input) REAL array, dimension(K) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + U (input) REAL array, dimension (LDU, N) + The last N - K columns of this matrix contain the deflated + left singular vectors. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= N. + + U2 (input) REAL array, dimension (LDU2, N) + The first K columns of this matrix contain the non-deflated + left singular vectors for the split problem. + + LDU2 (input) INTEGER + The leading dimension of the array U2. LDU2 >= N. + + VT (input) REAL array, dimension (LDVT, M) + The last M - K columns of VT' contain the deflated + right singular vectors. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= N. + + VT2 (input) REAL array, dimension (LDVT2, N) + The first K columns of VT2' contain the non-deflated + right singular vectors for the split problem. + + LDVT2 (input) INTEGER + The leading dimension of the array VT2. LDVT2 >= N. + + IDXC (input) INTEGER array, dimension ( N ) + The permutation used to arrange the columns of U (and rows of + VT) into three groups: the first group contains non-zero + entries only at and above (or before) NL +1; the second + contains non-zero entries only at and below (or after) NL+2; + and the third is dense. The first column of U and the row of + VT are treated separately, however. + + The rows of the singular vectors found by SLASD4 + must be likewise permuted before the matrix multiplies can + take place. + + CTOT (input) INTEGER array, dimension ( 4 ) + A count of the total number of the various types of columns + in U (or rows in VT), as described in IDXC. The fourth column + type is any column which has been deflated. + + Z (input) REAL array, dimension (K) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + + /* Function Body */ + *info = 0; + + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD3", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = dabs(z__[1]); + scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.f) { + scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; +/* L10: */ + } + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L20: */ + } + +/* Keep a copy of Z. */ + + scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + +/* Normalize Z. */ + + rho = snrm2_(k, &z__[1], &c__1); + slascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Find the new singular values. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], + &vt[j * vt_dim1 + 1], info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } +/* L30: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); +/* L40: */ + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ + i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); +/* L50: */ + } + r__2 = sqrt((r__1 = z__[i__], dabs(r__1))); + z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]); +/* L60: */ + } + +/* + Compute left singular vectors of the modified diagonal matrix, + and store related information for the right singular vectors. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * + vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.f; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ + * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; +/* L70: */ + } + temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; +/* L80: */ + } +/* L90: */ + } + +/* Update the left singular vector matrix. */ + + if (*k == 2) { + sgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset], + ldq, &c_b29, &u[u_offset], ldu); + goto L100; + } + if (ctot[1] > 0) { + sgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1], + ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + sgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1] + , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1], + ldu); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + sgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1], + ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu); + } else { + slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); + } + scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + sgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu); + +/* Generate the right singular vectors. */ + +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; +/* L110: */ + } +/* L120: */ + } + +/* Update the right singular vector matrix. */ + + if (*k == 2) { + sgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset] + , ldvt2, &c_b29, &vt[vt_offset], ldvt); + return 0; + } + ktemp = ctot[1] + 1; + sgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[ + vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1], + ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1], + ldvt); + } + + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; +/* L130: */ + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; +/* L140: */ + } + } + ctemp = ctot[2] + 1 + ctot[3]; + sgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, & + vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 + + 1], ldvt); + + return 0; + +/* End of SLASD3 */ + +} /* slasd3_ */ + +/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__, + real *delta, real *rho, real *sigma, real *work, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real a, b, c__; + static integer j; + static real w, dd[3]; + static integer ii; + static real dw, zz[3]; + static integer ip1; + static real eta, phi, eps, tau, psi; + static integer iim1, iip1; + static real dphi, dpsi; + static integer iter; + static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip; + static integer niter; + static real dtisq; + static logical swtch; + static real dtnsq; + extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *, + real *, real *, real *, integer *); + static real delsq2; + extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *, + real *, real *, real *); + static real dtnsq1; + static logical swtch3; + extern doublereal slamch_(char *); + static logical orgati; + static real erretm, dtipsq, rhoinv; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + October 31, 1999 + + + Purpose + ======= + + This subroutine computes the square root of the I-th updated + eigenvalue of a positive symmetric rank-one modification to + a positive diagonal matrix whose entries are given as the squares + of the corresponding entries in the array d, and that + + 0 <= D(i) < D(j) for i < j + + and that RHO > 0. This is arranged by the calling routine, and is + no loss in generality. The rank-one modified system is thus + + diag( D ) * diag( D ) + RHO * Z * Z_transpose. + + where we assume the Euclidean norm of Z is 1. + + The method consists of approximating the rational functions in the + secular equation by simpler interpolating rational functions. + + Arguments + ========= + + N (input) INTEGER + The length of all arrays. + + I (input) INTEGER + The index of the eigenvalue to be computed. 1 <= I <= N. + + D (input) REAL array, dimension ( N ) + The original eigenvalues. It is assumed that they are in + order, 0 <= D(I) < D(J) for I < J. + + Z (input) REAL array, dimension ( N ) + The components of the updating vector. + + DELTA (output) REAL array, dimension ( N ) + If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th + component. If N = 1, then DELTA(1) = 1. The vector DELTA + contains the information necessary to construct the + (singular) eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + SIGMA (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + WORK (workspace) REAL array, dimension ( N ) + If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th + component. If N = 1, then WORK( 1 ) = 1. + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = 1, the updating process failed. + + Internal Parameters + =================== + + Logical variable ORGATI (origin-at-i?) is used for distinguishing + whether D(i) or D(i+1) is treated as the origin. + + ORGATI = .true. origin at i + ORGATI = .false. origin at i+1 + + Logical variable SWTCH3 (switch-for-3-poles?) is for noting + if we are working with THREE poles! + + MAXIT is the maximum number of iterations allowed for each + eigenvalue. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Since this routine is called in an inner loop, we do no argument + checking. + + Quick return for N=1 and 2. +*/ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.f; + work[1] = 1.f; + return 0; + } + if (*n == 2) { + slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = slamch_("Epsilon"); + rhoinv = 1.f / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.f; + +/* + If ||Z||_2 is not one, then TEMP should be set to + RHO * ||Z||_2^2 / TWO +*/ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.f; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.f) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + } + +/* + It can be proved that + D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO +*/ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* + The following TAU is to approximate + SIGMA_n^2 - D( N )*D( N ) +*/ + + if (a < 0.f) { + tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f); + } + +/* + It can be proved that + D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 +*/ + + } + +/* The following ETA is to approximate SIGMA_n - D( N ) */ + + eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau)); + + *sigma = d__[*n] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - eta; + work[j] = d__[j] + d__[*i__] + eta; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.f) { + c__ = dabs(c__); + } + if (c__ == 0.f) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / ( + c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.f) { + eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta > 0.f) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.f) { + eta /= 2.f; + } + + tau += eta; + eta /= *sigma + sqrt(eta + *sigma * *sigma); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + + *sigma += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (work[*n] * delta[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * + (dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.f; + temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.f; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.f; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + if (w > 0.f) { + +/* + d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 + + We choose d(i) as origin. +*/ + + orgati = TRUE_; + sg2lb = 0.f; + sg2ub = delsq2; + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.f) { + tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } else { + tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } + +/* + TAU now is an estimation of SIGMA^2 - D( I )^2. The + following, however, is the corresponding estimation of + SIGMA - D( I ). +*/ + + eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau)); + } else { + +/* + (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 + + We choose d(i+1) as origin. +*/ + + orgati = FALSE_; + sg2lb = -delsq2; + sg2ub = 0.f; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.f) { + tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs( + r__1)))); + } else { + tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1)))) + / (c__ * 2.f); + } + +/* + TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The + following, however, is the corresponding estimation of + SIGMA - D( IP1 ). +*/ + + eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau, + dabs(r__1)))); + } + + if (orgati) { + ii = *i__; + *sigma = d__[*i__] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + eta; + delta[j] = d__[j] - d__[*i__] - eta; +/* L130: */ + } + } else { + ii = *i__ + 1; + *sigma = d__[ip1] + eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ip1] + eta; + delta[j] = d__[j] - d__[ip1] - eta; +/* L140: */ + } + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* + W is the value of the secular function with + its ii-th element removed. +*/ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.f) { + swtch3 = TRUE_; + } + } else { + if (w > 0.f) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (r__1 * r__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / + (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs( + r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.f) { + eta = (sg2ub - tau) / 2.f; + } else { + eta = (sg2lb - tau) / 2.f; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + prew = w; + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f + + dabs(tau) * dw; + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + + swtch = FALSE_; + if (orgati) { + if (-w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } else { + if (w > dabs(prew) / 10.f) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 20; ++niter) { + +/* Test for convergence */ + + if (dabs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + r__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (r__1 * r__1); + } else { +/* Computing 2nd power */ + r__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (r__1 * r__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.f) { + if (a == 0.f) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.f) { + eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)) + )) / (c__ * 2.f); + } else { + eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, + dabs(r__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + if (*info != 0) { + goto L240; + } + } + +/* + Note, eta should be positive if w is negative, and + eta should be negative otherwise. However, + if for some reason caused by roundoff, eta*w > 0, + we simply use one Newton step instead. This way + will guarantee eta*w < 0. +*/ + + if (w * eta >= 0.f) { + eta = -w / dw; + } + if (orgati) { + temp1 = work[*i__] * delta[*i__]; + temp = eta - temp1; + } else { + temp1 = work[ip1] * delta[ip1]; + temp = eta - temp1; + } + if (temp > sg2ub || temp < sg2lb) { + if (w < 0.f) { + eta = (sg2ub - tau) / 2.f; + } else { + eta = (sg2lb - tau) / 2.f; + } + } + + tau += eta; + eta /= *sigma + sqrt(*sigma * *sigma + eta); + + *sigma += eta; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.f; + psi = 0.f; + erretm = 0.f; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = dabs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.f; + phi = 0.f; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * + 3.f + dabs(tau) * dw; + if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) { + swtch = ! swtch; + } + + if (w <= 0.f) { + sg2lb = dmax(sg2lb,tau); + } else { + sg2ub = dmin(sg2ub,tau); + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of SLASD4 */ + +} /* slasd4_ */ + +/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta, + real *rho, real *dsigma, real *work) +{ + /* System generated locals */ + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real b, c__, w, del, tau, delsq; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + This subroutine computes the square root of the I-th eigenvalue + of a positive symmetric rank-one modification of a 2-by-2 diagonal + matrix + + diag( D ) * diag( D ) + RHO * Z * transpose(Z) . + + The diagonal entries in the array D are assumed to satisfy + + 0 <= D(i) < D(j) for i < j . + + We also assume RHO > 0 and that the Euclidean norm of the vector + Z is one. + + Arguments + ========= + + I (input) INTEGER + The index of the eigenvalue to be computed. I = 1 or I = 2. + + D (input) REAL array, dimension ( 2 ) + The original eigenvalues. We assume 0 <= D(1) < D(2). + + Z (input) REAL array, dimension ( 2 ) + The components of the updating vector. + + DELTA (output) REAL array, dimension ( 2 ) + Contains (D(j) - lambda_I) in its j-th component. + The vector DELTA contains the information necessary + to construct the eigenvectors. + + RHO (input) REAL + The scalar in the symmetric updating formula. + + DSIGMA (output) REAL + The computed lambda_I, the I-th updated eigenvalue. + + WORK (workspace) REAL array, dimension ( 2 ) + WORK contains (D(j) + sigma_I) in its j-th component. + + Further Details + =============== + + Based on contributions by + Ren-Cang Li, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] * + z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f; + if (w > 0.f) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* + B > ZERO, always + + The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +*/ + + tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1)) + )); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2.f + tau; + work[2] = d__[1] + tau + d__[2]; +/* + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) +*/ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.f) { + tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f)); + } else { + tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2.f + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU +*/ + } +/* + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.f) { + tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f; + } else { + tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2.f + tau; +/* + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP +*/ + } + return 0; + +/* End of SLASD5 */ + +} /* slasd5_ */ + +/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, + integer *idxq, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * + difl, real *difr, real *z__, integer *k, real *c__, real *s, real * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasd7_(integer *, integer *, integer *, integer *, + integer *, real *, real *, real *, real *, real *, real *, real *, + real *, real *, real *, integer *, integer *, integer *, integer + *, integer *, integer *, integer *, real *, integer *, real *, + real *, integer *), slasd8_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *, real *, real *, + integer *); + static integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slamrg_(integer *, + integer *, real *, integer *, integer *, integer *); + static real orgnrm; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASD6 computes the SVD of an updated upper bidiagonal matrix B + obtained by merging two smaller ones by appending a row. This + routine is used only for the problem which requires all singular + values and optionally singular vector matrices in factored form. + B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. + A related subroutine, SLASD1, handles the case in which all singular + values and singular vectors of the bidiagonal matrix are desired. + + SLASD6 computes the SVD as follows: + + ( D1(in) 0 0 0 ) + B = U(in) * ( Z1' a Z2' b ) * VT(in) + ( 0 0 D2(in) 0 ) + + = U(out) * ( D(out) 0) * VT(out) + + where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M + with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros + elsewhere; and the entry b is empty if SQRE = 0. + + The singular values of B can be computed using D1, D2, the first + components of all the right singular vectors of the lower block, and + the last components of all the right singular vectors of the upper + block. These components are stored and updated in VF and VL, + respectively, in SLASD6. Hence U and VT are not explicitly + referenced. + + The singular values are stored in D. The algorithm consists of two + stages: + + The first stage consists of deflating the size of the problem + when there are multiple singular values or if there is a zero + in the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine SLASD7. + + The second stage consists of calculating the updated + singular values. This is done by finding the roots of the + secular equation via the routine SLASD4 (as called by SLASD8). + This routine also updates VF and VL and computes the distances + between the updated singular values and the old singular + values. + + SLASD6 is called from SLASDA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + D (input/output) REAL array, dimension ( NL+NR+1 ). + On entry D(1:NL,1:NL) contains the singular values of the + upper block, and D(NL+2:N) contains the singular values + of the lower block. On exit D(1:N) contains the singular + values of the modified matrix. + + VF (input/output) REAL array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VL (input/output) REAL array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors of + the lower block. On exit, VL contains the last components of + all right singular vectors of the bidiagonal matrix. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + IDXQ (output) INTEGER array, dimension ( N ) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, i.e. + D( IDXQ( I = 1, N ) ) will be in ascending order. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM and POLES, must be at least N. + + POLES (output) REAL array, dimension ( LDGNUM, 2 ) + On exit, POLES(1,*) is an array containing the new singular + values obtained from solving the secular equation, and + POLES(2,*) is an array containing the poles in the secular + equation. Not referenced if ICOMPQ = 0. + + DIFL (output) REAL array, dimension ( N ) + On exit, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (output) REAL array, + dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + On exit, DIFR(I, 1) is the distance between I-th updated + (undeflated) singular value and the I+1-th (undeflated) old + singular value. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + See SLASD8 for details on DIFL and DIFR. + + Z (output) REAL array, dimension ( M ) + The first elements of this array contain the components + of the deflation-adjusted updating row vector. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (output) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + WORK (workspace) REAL array, dimension ( 4 * M ) + + IWORK (workspace) INTEGER array, dimension ( 3 * N ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD6", &i__1); + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in SLASD7 and SLASD8. +*/ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* + Scale. + + Computing MAX +*/ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + orgnrm = dmax(r__1,r__2); + d__[*nl + 1] = 0.f; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) { + orgnrm = (r__1 = d__[i__], dabs(r__1)); + } +/* L10: */ + } + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + scopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of SLASD6 */ + +} /* slasd6_ */ + +/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, + real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, + integer *idx, integer *idxp, integer *idxq, integer *perm, integer * + givptr, integer *givcol, integer *ldgcol, real *givnum, integer * + ldgnum, real *c__, real *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + real r__1, r__2; + + /* Local variables */ + static integer i__, j, m, n, k2; + static real z1; + static integer jp; + static real eps, tau, tol; + static integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, + integer *, real *, real *); + static integer idxjp, jprev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slapy2_(real *, real *), slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( + integer *, integer *, real *, integer *, integer *, integer *); + static real hlftol; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASD7 merges the two sets of singular values together into a single + sorted set. Then it tries to deflate the size of the problem. There + are two ways in which deflation can occur: when two or more singular + values are close together or if there is a tiny entry in the Z + vector. For each such occurrence the order of the related + secular equation problem is reduced by one. + + SLASD7 is called from SLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows: + = 0: Compute singular values only. + = 1: Compute singular vectors of upper + bidiagonal matrix in compact form. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + K (output) INTEGER + Contains the dimension of the non-deflated matrix, this is + the order of the related secular equation. 1 <= K <=N. + + D (input/output) REAL array, dimension ( N ) + On entry D contains the singular values of the two submatrices + to be combined. On exit D contains the trailing (N-K) updated + singular values (those which were deflated) sorted into + increasing order. + + Z (output) REAL array, dimension ( M ) + On exit Z contains the updating row vector in the secular + equation. + + ZW (workspace) REAL array, dimension ( M ) + Workspace for Z. + + VF (input/output) REAL array, dimension ( M ) + On entry, VF(1:NL+1) contains the first components of all + right singular vectors of the upper block; and VF(NL+2:M) + contains the first components of all right singular vectors + of the lower block. On exit, VF contains the first components + of all right singular vectors of the bidiagonal matrix. + + VFW (workspace) REAL array, dimension ( M ) + Workspace for VF. + + VL (input/output) REAL array, dimension ( M ) + On entry, VL(1:NL+1) contains the last components of all + right singular vectors of the upper block; and VL(NL+2:M) + contains the last components of all right singular vectors + of the lower block. On exit, VL contains the last components + of all right singular vectors of the bidiagonal matrix. + + VLW (workspace) REAL array, dimension ( M ) + Workspace for VL. + + ALPHA (input) REAL + Contains the diagonal element associated with the added row. + + BETA (input) REAL + Contains the off-diagonal element associated with the added + row. + + DSIGMA (output) REAL array, dimension ( N ) + Contains a copy of the diagonal elements (K-1 singular values + and one zero) in the secular equation. + + IDX (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to sort the contents of + D into ascending order. + + IDXP (workspace) INTEGER array, dimension ( N ) + This will contain the permutation used to place deflated + values of D at the end of the array. On output IDXP(2:K) + points to the nondeflated D-values and IDXP(K+1:N) + points to the deflated singular values. + + IDXQ (input) INTEGER array, dimension ( N ) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that entries in + the first half of this permutation must first be moved one + position backward; and entries in the second half + must first have NL+1 added to their values. + + PERM (output) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) to be applied + to each singular block. Not referenced if ICOMPQ = 0. + + GIVPTR (output) INTEGER + The number of Givens rotations which took place in this + subproblem. Not referenced if ICOMPQ = 0. + + GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. Not referenced if ICOMPQ = 0. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value to be used in the + corresponding Givens rotation. Not referenced if ICOMPQ = 0. + + LDGNUM (input) INTEGER + The leading dimension of GIVNUM, must be at least N. + + C (output) REAL + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (output) REAL + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD7", &i__1); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* + Generate the first part of the vector Z and move the singular + values in the first part of D one position backward. +*/ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.f; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.f; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.f; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerence */ + + eps = slamch_("Epsilon"); +/* Computing MAX */ + r__1 = dabs(*alpha), r__2 = dabs(*beta); + tol = dmax(r__1,r__2); +/* Computing MAX */ + r__2 = (r__1 = d__[n], dabs(r__1)); + tol = eps * 64.f * dmax(r__2,tol); + +/* + There are 2 kinds of deflation -- first a value in the z-vector + is small, second two (or more) singular values are very close + together (their difference is small). + + If the value in the z-vector is small, we simply permute the + array so that the corresponding singular value is moved to the + end. + + If two values in the D-vector are close, we perform a two-sided + rotation designed to make one of the corresponding z-vector + entries zero, and then permute the array so that the deflated + singular value is moved to the end. + + If there are multiple singular values then the problem deflates. + Here the number of equal singular values are found. As each equal + singular value is found, an elementary reflector is computed to + rotate the corresponding singular subspace so that the + corresponding components of Z are zero in this new basis. +*/ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((r__1 = z__[j], dabs(r__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = slapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.f; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* + Sort the singular values into DSIGMA. The singular values which + were not deflated go into the first K slots of DSIGMA, except + that DSIGMA(1) is treated separately. +*/ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* + The deflated singular values go back into the last N - K slots of + D. +*/ + + i__1 = n - *k; + scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* + Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and + VL(M). +*/ + + dsigma[1] = 0.f; + hlftol = tol / 2.f; + if (dabs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = slapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.f; + *s = 0.f; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (dabs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of SLASD7 */ + +} /* slasd7_ */ + +/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real * + z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, + real *dsigma, real *work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static integer i__, j; + static real dj, rho; + static integer iwk1, iwk2, iwk3; + static real temp; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static integer iwk2i, iwk3i; + extern doublereal snrm2_(integer *, real *, integer *); + static real diflj, difrj, dsigj; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, + real *, real *, real *, real *, integer *), xerbla_(char *, + integer *); + static real dsigjp; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, + real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASD8 finds the square roots of the roots of the secular equation, + as defined by the values in DSIGMA and Z. It makes the appropriate + calls to SLASD4, and stores, for each element in D, the distance + to its two nearest poles (elements in DSIGMA). It also updates + the arrays VF and VL, the first and last components of all the + right singular vectors of the original bidiagonal matrix. + + SLASD8 is called from SLASD6. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form in the calling routine: + = 0: Compute singular values only. + = 1: Compute singular vectors in factored form as well. + + K (input) INTEGER + The number of terms in the rational function to be solved + by SLASD4. K >= 1. + + D (output) REAL array, dimension ( K ) + On output, D contains the updated singular values. + + Z (input) REAL array, dimension ( K ) + The first K elements of this array contain the components + of the deflation-adjusted updating row vector. + + VF (input/output) REAL array, dimension ( K ) + On entry, VF contains information passed through DBEDE8. + On exit, VF contains the first K components of the first + components of all right singular vectors of the bidiagonal + matrix. + + VL (input/output) REAL array, dimension ( K ) + On entry, VL contains information passed through DBEDE8. + On exit, VL contains the first K components of the last + components of all right singular vectors of the bidiagonal + matrix. + + DIFL (output) REAL array, dimension ( K ) + On exit, DIFL(I) = D(I) - DSIGMA(I). + + DIFR (output) REAL array, + dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and + dimension ( K ) if ICOMPQ = 0. + On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not + defined and will not be referenced. + + If ICOMPQ = 1, DIFR(1:K,2) is an array containing the + normalizing factors for the right singular vector matrix. + + LDDIFR (input) INTEGER + The leading dimension of DIFR, must be at least K. + + DSIGMA (input) REAL array, dimension ( K ) + The first K elements of this array contain the old roots + of the deflated updating problem. These are the poles + of the secular equation. + + WORK (workspace) REAL array, dimension at least 3 * K + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASD8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = dabs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.f; + difr[(difr_dim1 << 1) + 1] = 1.f; + } + return 0; + } + +/* + Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can + be computed with high relative accuracy (barring over/underflow). + This is a problem on machines without a guard digit in + add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). + The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), + which on any of these machines zeros out the bottommost + bit of DSIGMA(I) if it is 1; this makes the subsequent + subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation + occurs. On binary machines with a guard digit (almost all + machines) it does not change DSIGMA(I) at all. On hexadecimal + and decimal machines with a guard digit, it slightly + changes the bottommost bits of DSIGMA(I). It does not account + for hexadecimal or decimal machines without guard digits + (we know of none). We use a subroutine call to compute + 2*DLAMBDA(I) to prevent optimizing compilers from eliminating + this code. +*/ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = snrm2_(k, &z__[1], &c__1); + slascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + slaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k); + +/* + Compute the updated singular values, the arrays DIFL, DIFR, + and the updated Z. +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, the computation is terminated. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1))); + z__[i__] = r_sign(&r__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = snrm2_(k, &work[1], &c__1); + work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of SLASD8 */ + +} /* slasd8_ */ + +/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, + integer *k, real *difl, real *difr, real *z__, real *poles, integer * + givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum, + real *c__, real *s, real *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, + nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + static real beta; + static integer idxq, nlvl; + static real alpha; + static integer inode, ndiml, ndimr, idxqi, itemp, sqrei; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasd6_(integer *, integer *, integer *, integer *, + real *, real *, real *, real *, real *, integer *, integer *, + integer *, integer *, integer *, real *, integer *, real *, real * + , real *, real *, integer *, real *, real *, real *, integer *, + integer *); + static integer nwork1, nwork2; + extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_( + char *, integer *, integer *, integer *, integer *, integer *, + real *, real *, real *, integer *, real *, integer *, real *, + integer *, real *, integer *), slasdt_(integer *, integer + *, integer *, integer *, integer *, integer *, integer *), + slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static integer smlszp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + Using a divide and conquer approach, SLASDA computes the singular + value decomposition (SVD) of a real upper bidiagonal N-by-M matrix + B with diagonal D and offdiagonal E, where M = N + SQRE. The + algorithm computes the singular values in the SVD B = U * S * VT. + The orthogonal matrices U and VT are optionally computed in + compact form. + + A related subroutine, SLASD0, computes the singular values and + the singular vectors in explicit form. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed + in compact form, as follows + = 0: Compute singular values only. + = 1: Compute singular vectors of upper bidiagonal + matrix in compact form. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row dimension of the upper bidiagonal matrix. This is + also the dimension of the main diagonal array D. + + SQRE (input) INTEGER + Specifies the column dimension of the bidiagonal matrix. + = 0: The bidiagonal matrix has column dimension M = N; + = 1: The bidiagonal matrix has column dimension M = N + 1. + + D (input/output) REAL array, dimension ( N ) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit D, if INFO = 0, contains its singular values. + + E (input) REAL array, dimension ( M-1 ) + Contains the subdiagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + U (output) REAL array, + dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left + singular vector matrices of all subproblems at the bottom + level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, POLES, + GIVNUM, and Z. + + VT (output) REAL array, + dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right + singular vector matrices of all subproblems at the bottom + level. + + K (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th + secular equation on the computation tree. + + DIFL (output) REAL array, dimension ( LDU, NLVL ), + where NLVL = floor(log_2 (N/SMLSIZ))). + + DIFR (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) + record distances between singular values on the I-th + level and singular values on the (I -1)-th level, and + DIFR(1:N, 2 * I ) contains the normalizing factors for + the right singular vector matrix. See SLASD8 for details. + + Z (output) REAL array, + dimension ( LDU, NLVL ) if ICOMPQ = 1 and + dimension ( N ) if ICOMPQ = 0. + The first K elements of Z(1, I) contain the components of + the deflation-adjusted updating row vector for subproblems + on the I-th level. + + POLES (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and + POLES(1, 2*I) contain the new and old singular values + involved in the secular equations on the I-th level. + + GIVPTR (output) INTEGER array, + dimension ( N ) if ICOMPQ = 1, and not referenced if + ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records + the number of Givens rotations performed on the I-th + problem on the computation tree. + + GIVCOL (output) INTEGER array, + dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations + of Givens rotations performed on the I-th level on the + computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (output) INTEGER array, + dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced + if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records + permutations done on the I-th level of the computation tree. + + GIVNUM (output) REAL array, + dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not + referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, + GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- + values of Givens rotations performed on the I-th level on + the computation tree. + + C (output) REAL array, + dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. + If ICOMPQ = 1 and the I-th subproblem is not square, on exit, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (output) REAL array, dimension ( N ) if + ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 + and the I-th subproblem is not square, on exit, S( I ) + contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + WORK (workspace) REAL array, dimension + (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). + + IWORK (workspace) INTEGER array. + Dimension must be at least (7 * N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an singular value did not converge + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASDA", &i__1); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call SLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info); + } else { + slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + for the nodes on bottom level of the tree, solve + their subproblems by SLASDQ. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + slaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info); + itemp = nwork1 + nl * smlszp; + scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + slaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu); + slaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1], + ldu); + slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info); + scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + slaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp); + slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info); + itemp = nwork1 + (nrp1 - 1) * smlszp; + scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + slaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu); + slaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1], + ldu); + slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info); + scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of SLASDA */ + +} /* slasda_ */ + +/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt, + integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real * + work, integer *info) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + static integer i__, j; + static real r__, cs, sn; + static integer np1, isub; + static real smin; + static integer sqre1; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *); + static integer iuplo; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), xerbla_(char *, integer *), slartg_(real *, + real *, real *, real *, real *); + static logical rotate; + extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer + *, integer *, real *, real *, real *, integer *, real *, integer * + , real *, integer *, real *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASDQ computes the singular value decomposition (SVD) of a real + (upper or lower) bidiagonal matrix with diagonal D and offdiagonal + E, accumulating the transformations if desired. Letting B denote + the input bidiagonal matrix, the algorithm computes orthogonal + matrices Q and P such that B = Q * S * P' (P' denotes the transpose + of P). The singular values S are overwritten on D. + + The input matrix U is changed to U * Q if desired. + The input matrix VT is changed to P' * VT if desired. + The input matrix C is changed to Q' * C if desired. + + See "Computing Small Singular Values of Bidiagonal Matrices With + Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, + LAPACK Working Note #3, for a detailed description of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + On entry, UPLO specifies whether the input bidiagonal matrix + is upper or lower bidiagonal, and wether it is square are + not. + UPLO = 'U' or 'u' B is upper bidiagonal. + UPLO = 'L' or 'l' B is lower bidiagonal. + + SQRE (input) INTEGER + = 0: then the input matrix is N-by-N. + = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and + (N+1)-by-N if UPLU = 'L'. + + The bidiagonal matrix has + N = NL + NR + 1 rows and + M = N + SQRE >= N columns. + + N (input) INTEGER + On entry, N specifies the number of rows and columns + in the matrix. N must be at least 0. + + NCVT (input) INTEGER + On entry, NCVT specifies the number of columns of + the matrix VT. NCVT must be at least 0. + + NRU (input) INTEGER + On entry, NRU specifies the number of rows of + the matrix U. NRU must be at least 0. + + NCC (input) INTEGER + On entry, NCC specifies the number of columns of + the matrix C. NCC must be at least 0. + + D (input/output) REAL array, dimension (N) + On entry, D contains the diagonal entries of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in ascending order. + + E (input/output) REAL array. + dimension is (N-1) if SQRE = 0 and N if SQRE = 1. + On entry, the entries of E contain the offdiagonal entries + of the bidiagonal matrix whose SVD is desired. On normal + exit, E will contain 0. If the algorithm does not converge, + D and E will contain the diagonal and superdiagonal entries + of a bidiagonal matrix orthogonally equivalent to the one + given as input. + + VT (input/output) REAL array, dimension (LDVT, NCVT) + On entry, contains a matrix which on exit has been + premultiplied by P', dimension N-by-NCVT if SQRE = 0 + and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). + + LDVT (input) INTEGER + On entry, LDVT specifies the leading dimension of VT as + declared in the calling (sub) program. LDVT must be at + least 1. If NCVT is nonzero LDVT must also be at least N. + + U (input/output) REAL array, dimension (LDU, N) + On entry, contains a matrix which on exit has been + postmultiplied by Q, dimension NRU-by-N if SQRE = 0 + and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). + + LDU (input) INTEGER + On entry, LDU specifies the leading dimension of U as + declared in the calling (sub) program. LDU must be at + least max( 1, NRU ) . + + C (input/output) REAL array, dimension (LDC, NCC) + On entry, contains an N-by-NCC matrix which on exit + has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 + and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). + + LDC (input) INTEGER + On entry, LDC specifies the leading dimension of C as + declared in the calling (sub) program. LDC must be at + least 1. If NCC is nonzero, LDC must also be at least N. + + WORK (workspace) REAL array, dimension (4*N) + Workspace. Only referenced if one of NCVT, NRU, or NCC is + nonzero, and if N is at least 2. + + INFO (output) INTEGER + On exit, a value of 0 indicates a successful exit. + If INFO < 0, argument number -INFO is illegal. + If INFO > 0, the algorithm did not converge, and INFO + specifies how many superdiagonals did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (lsame_(uplo, "U")) { + iuplo = 1; + } + if (lsame_(uplo, "L")) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASDQ", &i__1); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* + If matrix non-square upper bidiagonal, rotate to be lower + bidiagonal. The rotations are on the right. +*/ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.f; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt); + } + } + +/* + If matrix lower bidiagonal, rotate to be upper bidiagonal + by applying Givens rotations on the left. +*/ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* + If matrix (N+1)-by-N lower bidiagonal, one additional + rotation is needed. +*/ + + if (sqre1 == 1) { + slartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu); + } else { + slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } else { + slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc); + } + } + } + +/* + Call SBDSQR to compute the SVD of the reduced real + N-by-N upper bidiagonal matrix. +*/ + + sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info); + +/* + Sort the singular values into ascending order (insertion sort on + singular values, but only one transposition per singular vector) +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] + , &c__1); + } + if (*ncc > 0) { + sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of SLASDQ */ + +} /* slasdq_ */ + +/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + static integer i__, il, ir, maxn; + static real temp; + static integer nlvl, llst, ncrnt; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASDT creates a tree of subproblems for bidiagonal divide and + conquer. + + Arguments + ========= + + N (input) INTEGER + On entry, the number of diagonal elements of the + bidiagonal matrix. + + LVL (output) INTEGER + On exit, the number of levels on the computation tree. + + ND (output) INTEGER + On exit, the number of nodes on the tree. + + INODE (output) INTEGER array, dimension ( N ) + On exit, centers of subproblems. + + NDIML (output) INTEGER array, dimension ( N ) + On exit, row dimensions of left children. + + NDIMR (output) INTEGER array, dimension ( N ) + On exit, row dimensions of right children. + + MSUB (input) INTEGER. + On entry, the maximum row dimension each subproblem at the + bottom of the tree can be of. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Find the number of levels on the tree. +*/ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((real) maxn / (real) (*msub + 1)) / log(2.f); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* + Constructing the tree at (NLVL+1)-st level. The number of + nodes created on this level is LLST * 2. +*/ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of SLASDT */ + +} /* slasdt_ */ + +/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha, + real *beta, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLASET initializes an m-by-n matrix A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set; the strictly lower + triangular part of A is not changed. + = 'L': Lower triangular part is set; the strictly upper + triangular part of A is not changed. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + ALPHA (input) REAL + The constant to which the offdiagonal elements are to be set. + + BETA (input) REAL + The constant to which the diagonal elements are to be set. + + A (input/output) REAL array, dimension (LDA,N) + On exit, the leading m-by-n submatrix of A is set as follows: + + if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, + if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, + otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, + + and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the strictly upper triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the strictly lower triangular or trapezoidal part of the + array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of SLASET */ + +} /* slaset_ */ + +/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work, + integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + real r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__; + static real eps; + extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *) + ; + static real scale; + static integer iinfo; + static real sigmn, sigmx; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *), slasq2_(integer *, real *, integer *); + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *), slascl_( + char *, integer *, integer *, real *, real *, integer *, integer * + , real *, integer *, integer *), slasrt_(char *, integer * + , real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASQ1 computes the singular values of a real N-by-N bidiagonal + matrix with diagonal D and off-diagonal E. The singular values + are computed to high relative accuracy, in the absence of + denormalization, underflow and overflow. The algorithm was first + presented in + + "Accurate singular values and differential qd algorithms" by K. V. + Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, + 1994, + + and the present implementation is described in "An implementation of + the dqds Algorithm (Positive Case)", LAPACK Working Note. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, D contains the diagonal elements of the + bidiagonal matrix whose SVD is desired. On normal exit, + D contains the singular values in decreasing order. + + E (input/output) REAL array, dimension (N) + On entry, elements E(1:N-1) contain the off-diagonal elements + of the bidiagonal matrix whose SVD is desired. + On exit, E is overwritten. + + WORK (workspace) REAL array, dimension (4*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -2; + i__1 = -(*info); + xerbla_("SLASQ1", &i__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = dabs(d__[1]); + return 0; + } else if (*n == 2) { + slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.f; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (r__1 = d__[i__], dabs(r__1)); +/* Computing MAX */ + r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1)); + sigmx = dmax(r__2,r__3); +/* L10: */ + } + d__[*n] = (r__1 = d__[*n], dabs(r__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.f) { + slasrt_("D", n, &d__[1], &iinfo); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + r__1 = sigmx, r__2 = d__[i__]; + sigmx = dmax(r__1,r__2); +/* L20: */ + } + +/* + Copy D and E into WORK (in the Z format) and scale (squaring the + input data makes scaling by a power of the radix pointless). +*/ + + eps = slamch_("Precision"); + safmin = slamch_("Safe minimum"); + scale = sqrt(eps / safmin); + scopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + scopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + r__1 = work[i__]; + work[i__] = r__1 * r__1; +/* L30: */ + } + work[*n * 2] = 0.f; + + slasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo); + } + + return 0; + +/* End of SLASQ1 */ + +} /* slasq1_ */ + +/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real d__, e; + static integer k; + static real s, t; + static integer i0, i4, n0, pp; + static real eps, tol; + static integer ipn4; + static real tol2; + static logical ieee; + static integer nbig; + static real dmin__, emin, emax; + static integer ndiv, iter; + static real qmin, temp, qmax, zmax; + static integer splt, nfail; + static real desig, trace, sigma; + static integer iinfo; + extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, integer *, integer *, integer * + , logical *); + extern doublereal slamch_(char *); + static integer iwhila, iwhilb; + static real oldemn, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASQ2 computes all the eigenvalues of the symmetric positive + definite tridiagonal matrix associated with the qd array Z to high + relative accuracy are computed to high relative accuracy, in the + absence of denormalization, underflow and overflow. + + To see the relation of Z to the tridiagonal matrix, let L be a + unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and + let U be an upper bidiagonal matrix with 1's above and diagonal + Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the + symmetric tridiagonal to which it is similar. + + Note : SLASQ2 defines a logical variable, IEEE, which is true + on machines which follow ieee-754 floating-point standard in their + handling of infinities and NaNs, and false otherwise. This variable + is passed to SLASQ3. + + Arguments + ========= + + N (input) INTEGER + The number of rows and columns in the matrix. N >= 0. + + Z (workspace) REAL array, dimension ( 4*N ) + On entry Z holds the qd array. On exit, entries 1 to N hold + the eigenvalues in decreasing order, Z( 2*N+1 ) holds the + trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If + N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) + holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of + shifts that failed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if the i-th argument is a scalar and had an illegal + value, then INFO = -i, if the i-th argument is an + array and the j-entry had an illegal value, then + INFO = -(i*100+j) + > 0: the algorithm failed + = 1, a split was marked by a positive value in E + = 2, current block of Z not diagonalized after 30*N + iterations (in inner while loop) + = 3, termination criterion of outer while loop not met + (program created more than N unreduced blocks) + + Further Details + =============== + Local Variables: I0:N0 defines a current unreduced segment of Z. + The shifts are accumulated in SIGMA. Iteration count is in ITER. + Ping-pong is controlled by PP (alternates between 0 and 1). + + ===================================================================== + + + Test the input arguments. + (in case SLASQ2 is not called by SLASQ1) +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = slamch_("Precision"); + safmin = slamch_("Safe minimum"); + tol = eps * 100.f; +/* Computing 2nd power */ + r__1 = tol; + tol2 = r__1 * r__1; + + if (*n < 0) { + *info = -1; + xerbla_("SLASQ2", &c__1); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.f) { + *info = -201; + xerbla_("SLASQ2", &c__2); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[2] < 0.f || z__[3] < 0.f) { + *info = -2; + xerbla_("SLASQ2", &c__2); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5f; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.f; + emin = z__[2]; + qmax = 0.f; + zmax = 0.f; + d__ = 0.f; + e = 0.f; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.f) { + *info = -(k + 200); + xerbla_("SLASQ2", &c__2); + return 0; + } else if (z__[k + 1] < 0.f) { + *info = -(k + 201); + xerbla_("SLASQ2", &c__2); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + r__1 = qmax, r__2 = z__[k]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[k + 1]; + emin = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = max(qmax,zmax), r__2 = z__[k + 1]; + zmax = dmax(r__1,r__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.f) { + *info = -((*n << 1) + 199); + xerbla_("SLASQ2", &c__2); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + r__1 = qmax, r__2 = z__[(*n << 1) - 1]; + qmax = dmax(r__1,r__2); + zmax = dmax(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.f) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + slasrt_("D", n, &z__[1], &iinfo); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.f) { + z__[(*n << 1) - 1] = 0.f; + return 0; + } + +/* Check whether the machine is IEEE conformable. */ + + ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen) + 6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2, + &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.f; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.f; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.f; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.f; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.f; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - (pp << 1)]; + emin = dmin(r__1,r__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4]; + qmax = dmax(r__1,r__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L150; + } + +/* + While array unfinished do + + E(N0) holds the value of SIGMA when submatrix in I0:N0 + splits from the rest of the array, but is negated. +*/ + + desig = 0.f; + if (n0 == *n) { + sigma = 0.f; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.f) { + *info = 1; + return 0; + } + +/* + Find last unreduced submatrix's top index I0, find QMAX and + EMIN. Find Gershgorin-type bound if Q's much greater than E's. +*/ + + emax = 0.f; + if (n0 > i0) { + emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1)); + } else { + emin = 0.f; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.f) { + goto L100; + } + if (qmin >= emax * 4.f) { +/* Computing MIN */ + r__1 = qmin, r__2 = z__[i4 - 3]; + qmin = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = emax, r__2 = z__[i4 - 5]; + emax = dmax(r__1,r__2); + } +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - 5]; + emin = dmin(r__1,r__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + +/* Store EMIN for passing to SLASQ3. */ + + z__[(n0 << 2) - 1] = emin; + +/* + Put -(initial shift) into DMIN. + + Computing MAX +*/ + r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax); + dmin__ = -dmax(r__1,r__2); + +/* Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */ + + pp = 0; + + nbig = (n0 - i0 + 1) * 30; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L130; + } + +/* While submatrix unfinished take a good dqds step. */ + + slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.f; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + r__1 = qmax, r__2 = z__[i4 + 1]; + qmax = dmax(r__1,r__2); +/* Computing MIN */ + r__1 = emin, r__2 = z__[i4 - 1]; + emin = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = oldemn, r__2 = z__[i4]; + oldemn = dmin(r__1,r__2); + } +/* L110: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L120: */ + } + + *info = 2; + return 0; + +/* end IWHILB */ + +L130: + +/* L140: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L150: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L160: */ + } + +/* Sort and compute sum of eigenvalues. */ + + slasrt_("D", n, &z__[1], &iinfo); + + e = 0.f; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L170: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (real) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100.f / (real) iter; + return 0; + +/* End of SLASQ2 */ + +} /* slasq2_ */ + +/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail, + integer *iter, integer *ndiv, logical *ieee) +{ + /* Initialized data */ + + static integer ttype = 0; + static real dmin1 = 0.f; + static real dmin2 = 0.f; + static real dn = 0.f; + static real dn1 = 0.f; + static real dn2 = 0.f; + static real tau = 0.f; + + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real s, t; + static integer j4, nn; + static real eps, tol; + static integer n0in, ipn4; + static real tol2, temp; + extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer + *, integer *, real *, real *, real *, real *, real *, real *, + real *, integer *), slasq5_(integer *, integer *, real *, integer + *, real *, real *, real *, real *, real *, real *, real *, + logical *), slasq6_(integer *, integer *, real *, integer *, real + *, real *, real *, real *, real *, real *); + extern doublereal slamch_(char *); + static real safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + May 17, 2000 + + + Purpose + ======= + + SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. + In case of failure it changes shifts, and tries again until output + is positive. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) REAL + Minimum value of d. + + SIGMA (output) REAL + Sum of shifts used in current segment. + + DESIG (input/output) REAL + Lower order part of SIGMA + + QMAX (input) REAL + Maximum value of q. + + NFAIL (output) INTEGER + Number of times shift was too big. + + ITER (output) INTEGER + Number of iterations. + + NDIV (output) INTEGER + Number of divisions. + + TTYPE (output) INTEGER + Shift type. + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + + n0in = *n0; + eps = slamch_("Precision"); + safmin = slamch_("Safe minimum"); + tol = eps * 100.f; +/* Computing 2nd power */ + r__1 = tol; + tol2 = r__1 * r__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + if (z__[nn - 5] > z__[nn - 3] * tol2) { + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f; + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0.f || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1]; + dmin2 = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1] + , r__1 = min(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = dmin(r__1,r__2); +/* Computing MIN */ + r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 = + min(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = dmin(r__1,r__2); +/* Computing MAX */ + r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = max(r__1, + r__2), r__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = dmax(r__1,r__2); + *dmin__ = -0.f; + } + } + +/* + L70: + + Computing MIN +*/ + r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 = + min(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp]; + if (*dmin__ < 0.f || safmin * *qmax < dmin(r__1,r__2)) { + +/* Choose a shift. */ + + slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1, + &dn2, &tau, &ttype); + +/* Call dqds until DMIN > 0. */ + +L80: + + slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1, + &dn2, ieee); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0.f && dmin1 > 0.f) { + +/* Success. */ + + goto L100; + + } else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] < + tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.f; + *dmin__ = 0.f; + goto L100; + } else if (*dmin__ < 0.f) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (ttype < -22) { + +/* Failed twice. Play it safe. */ + + tau = 0.f; + } else if (dmin1 > 0.f) { + +/* Late failure. Gives excellent shift. */ + + tau = (tau + *dmin__) * (1.f - eps * 2.f); + ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + tau *= .25f; + ttype += -12; + } + goto L80; + } else if (*dmin__ != *dmin__) { + +/* NaN. */ + + tau = 0.f; + goto L80; + } else { + +/* Possible underflow. Play it safe. */ + + goto L90; + } + } + +/* Risk of underflow. */ + +L90: + slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + tau = 0.f; + +L100: + if (tau < *sigma) { + *desig += tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + tau; + *desig = *sigma - (t - tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of SLASQ3 */ + +} /* slasq3_ */ + +/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp, + integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn, + real *dn1, real *dn2, real *tau, integer *ttype) +{ + /* Initialized data */ + + static real g = 0.f; + + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real s, a2, b1, b2; + static integer i4, nn, np; + static real gam, gap1, gap2; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASQ4 computes an approximation TAU to the smallest eigenvalue + using values of d from the previous transform. + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + NOIN (input) INTEGER + The value of N0 at start of EIGTEST. + + DMIN (input) REAL + Minimum value of d. + + DMIN1 (input) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (input) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (input) REAL + d(N) + + DN1 (input) REAL + d(N-1) + + DN2 (input) REAL + d(N-2) + + TAU (output) REAL + This is the shift. + + TTYPE (output) INTEGER + Shift type. + + Further Details + =============== + CNST1 = 9/16 + + ===================================================================== +*/ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + +/* + A negative DMIN forces the shift to take that absolute value + TTYPE records the type of shift. +*/ + + if (*dmin__ <= 0.f) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25f; + if (gap2 > 0.f && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0.f && gap1 > b1) { +/* Computing MAX */ + r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f; + s = dmax(r__1,r__2); + *ttype = -2; + } else { + s = 0.f; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + r__1 = s, r__2 = a2 - (b1 + b2); + s = dmin(r__1,r__2); + } +/* Computing MAX */ + r__1 = s, r__2 = *dmin__ * .333f; + s = dmax(r__1,r__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25f; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + b2 = z__[np - 2]; + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.f) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05f; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563f) { + s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25f; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.f) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (dmax(b2,b1) * 100.f < a2 || .563f < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05f; + } + + if (a2 < .563f) { + s = gam * (1.f - sqrt(a2)) / (a2 + 1.f); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + g += (1.f - g) * .333f; + } else if (*ttype == -18) { + g = .083250000000000005f; + } else { + g = .25f; + } + s = g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.f) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (dmax(b1,a2) * 100.f < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05f); +/* Computing 2nd power */ + r__1 = b2; + a2 = *dmin1 / (r__1 * r__1 + 1.f); + gap2 = *dmin2 * .5f - a2; + if (gap2 > 0.f && gap2 > b2 * a2) { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); + s = dmax(r__1,r__2); + } else { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); + s = dmax(r__1,r__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25f; + if (*dmin1 == *dn1) { + s = *dmin1 * .5f; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* + Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. + + Cases 10 and 11. +*/ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333f; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.f) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100.f < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05f); +/* Computing 2nd power */ + r__1 = b2; + a2 = *dmin2 / (r__1 * r__1 + 1.f); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0.f && gap2 > b2 * a2) { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2); + s = dmax(r__1,r__2); + } else { +/* Computing MAX */ + r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f); + s = dmax(r__1,r__2); + } + } else { + s = *dmin2 * .25f; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.f; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of SLASQ4 */ + +} /* slasq4_ */ + +/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp, + real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real * + dnm1, real *dnm2, logical *ieee) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real d__; + static integer j4, j4p2; + static real emin, temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + May 17, 2000 + + + Purpose + ======= + + SLASQ5 computes one dqds transform in ping-pong form, one + version for IEEE machines another for non IEEE machines. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + TAU (input) REAL + This is the shift. + + DMIN (output) REAL + Minimum value of d. + + DMIN1 (output) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) REAL + d(N0), the last value of d. + + DNM1 (output) REAL + d(N0-1). + + DNM2 (output) REAL + d(N0-2). + + IEEE (input) LOGICAL + Flag for IEEE or non IEEE arithmetic. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = dmin(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + r__1 = z__[j4]; + emin = dmin(r__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = dmin(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + r__1 = z__[j4 - 1]; + emin = dmin(r__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = dmin(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.f) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4]; + emin = dmin(r__1,r__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.f) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4 - 1]; + emin = dmin(r__1,r__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.f) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.f) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = dmin(*dmin__,*dn); + + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of SLASQ5 */ + +} /* slasq5_ */ + +/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp, + real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real * + dnm2) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2; + + /* Local variables */ + static real d__; + static integer j4, j4p2; + static real emin, temp; + extern doublereal slamch_(char *); + static real safmin; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + SLASQ6 computes one dqd (shift equal to zero) transform in + ping-pong form, with protection against underflow and overflow. + + Arguments + ========= + + I0 (input) INTEGER + First index. + + N0 (input) INTEGER + Last index. + + Z (input) REAL array, dimension ( 4*N ) + Z holds the qd array. EMIN is stored in Z(4*N0) to avoid + an extra argument. + + PP (input) INTEGER + PP=0 for ping, PP=1 for pong. + + DMIN (output) REAL + Minimum value of d. + + DMIN1 (output) REAL + Minimum value of d, excluding D( N0 ). + + DMIN2 (output) REAL + Minimum value of d, excluding D( N0 ) and D( N0-1 ). + + DN (output) REAL + d(N0), the last value of d. + + DNM1 (output) REAL + d(N0-1). + + DNM2 (output) REAL + d(N0-2). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = slamch_("Safe minimum"); + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.f; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4]; + emin = dmin(r__1,r__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.f) { + z__[j4 - 1] = 0.f; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.f; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = dmin(*dmin__,d__); +/* Computing MIN */ + r__1 = emin, r__2 = z__[j4 - 1]; + emin = dmin(r__1,r__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.f; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.f) { + z__[j4] = 0.f; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.f; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = dmin(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of SLASQ6 */ + +} /* slasq6_ */ + +/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, real *c__, real *s, real *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, info; + static real temp; + extern logical lsame_(char *, char *); + static real ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLASR performs the transformation + + A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) + + A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) + + where A is an m by n real matrix and P is an orthogonal matrix, + consisting of a sequence of plane rotations determined by the + parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' + and z = n when SIDE = 'R' or 'r' ): + + When DIRECT = 'F' or 'f' ( Forward sequence ) then + + P = P( z - 1 )*...*P( 2 )*P( 1 ), + + and when DIRECT = 'B' or 'b' ( Backward sequence ) then + + P = P( 1 )*P( 2 )*...*P( z - 1 ), + + where P( k ) is a plane rotation matrix for the following planes: + + when PIVOT = 'V' or 'v' ( Variable pivot ), + the plane ( k, k + 1 ) + + when PIVOT = 'T' or 't' ( Top pivot ), + the plane ( 1, k + 1 ) + + when PIVOT = 'B' or 'b' ( Bottom pivot ), + the plane ( k, z ) + + c( k ) and s( k ) must contain the cosine and sine that define the + matrix P( k ). The two by two plane rotation part of the matrix + P( k ), R( k ), is assumed to be of the form + + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + This version vectorises across rows of the array A when SIDE = 'L'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P' + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) + = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C, S (input) REAL arrays, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + c(k) and s(k) contain the cosine and sine that define the + matrix P(k). The two by two plane rotation part of the + matrix P(k), R(k), is assumed to be of the form + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + A (input/output) REAL array, dimension (LDA,N) + The m by n matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P' if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("SLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1.f || stemp != 0.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of SLASR */ + +} /* slasr_ */ + +/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, j; + static real d1, d2, d3; + static integer dir; + static real tmp; + static integer endd; + extern logical lsame_(char *, char *); + static integer stack[64] /* was [2][32] */; + static real dmnmx; + static integer start; + extern /* Subroutine */ int xerbla_(char *, integer *); + static integer stkpnt; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + Sort the numbers in D in increasing order (if ID = 'I') or + in decreasing order (if ID = 'D' ). + + Use Quick Sort, reverting to Insertion sort on arrays of + size <= 20. Dimension of STACK limits N to about 2**32. + + Arguments + ========= + + ID (input) CHARACTER*1 + = 'I': sort D in increasing order; + = 'D': sort D in decreasing order. + + N (input) INTEGER + The length of the array D. + + D (input/output) REAL array, dimension (N) + On entry, the array to be sorted. + On exit, D has been sorted into increasing order + (D(1) <= ... <= D(N) ) or into decreasing order + (D(1) >= ... >= D(N) ), depending on ID. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input paramters. +*/ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (lsame_(id, "D")) { + dir = 0; + } else if (lsame_(id, "I")) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLASRT", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* + Partition D( START:ENDD ) and stack parts, largest one first + + Choose partition entry as median of 3 +*/ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of SLASRT */ + +} /* slasrt_ */ + +/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale, + real *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + real r__1; + + /* Local variables */ + static integer ix; + static real absxi; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASSQ returns the values scl and smsq such that + + ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is + assumed to be non-negative and scl returns the value + + scl = max( scale, abs( x( i ) ) ). + + scale and sumsq must be supplied in SCALE and SUMSQ and + scl and smsq are overwritten on SCALE and SUMSQ respectively. + + The routine makes only one pass through the vector x. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) REAL array, dimension (N) + The vector for which a scaled sum of squares is computed. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) REAL + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with scl , the scaling factor + for the sum of squares. + + SUMSQ (input/output) REAL + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with smsq , the basic sum of + squares from which scl has been factored out. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.f) { + absxi = (r__1 = x[ix], dabs(r__1)); + if (*scale < absxi) { +/* Computing 2nd power */ + r__1 = *scale / absxi; + *sumsq = *sumsq * (r__1 * r__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + r__1 = absxi / *scale; + *sumsq += r__1 * r__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of SLASSQ */ + +} /* slassq_ */ + +/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real * + ssmax, real *snr, real *csr, real *snl, real *csl) +{ + /* System generated locals */ + real r__1; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, + crt, slt, srt; + static integer pmax; + static real temp; + static logical swap; + static real tsign; + static logical gasmal; + extern doublereal slamch_(char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLASV2 computes the singular value decomposition of a 2-by-2 + triangular matrix + [ F G ] + [ 0 H ]. + On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the + smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and + right singular vectors for abs(SSMAX), giving the decomposition + + [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] + [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. + + Arguments + ========= + + F (input) REAL + The (1,1) element of the 2-by-2 matrix. + + G (input) REAL + The (1,2) element of the 2-by-2 matrix. + + H (input) REAL + The (2,2) element of the 2-by-2 matrix. + + SSMIN (output) REAL + abs(SSMIN) is the smaller singular value. + + SSMAX (output) REAL + abs(SSMAX) is the larger singular value. + + SNL (output) REAL + CSL (output) REAL + The vector (CSL, SNL) is a unit left singular vector for the + singular value abs(SSMAX). + + SNR (output) REAL + CSR (output) REAL + The vector (CSR, SNR) is a unit right singular vector for the + singular value abs(SSMAX). + + Further Details + =============== + + Any input parameter may be aliased with any output parameter. + + Barring over/underflow and assuming a guard digit in subtraction, all + output quantities are correct to within a few units in the last + place (ulps). + + In IEEE arithmetic, the code works correctly if one matrix element is + infinite. + + Overflow will not occur unless the largest singular value itself + overflows or is within a few ulps of overflow. (On machines with + partial overflow, like the Cray, overflow may occur if the largest + singular value is within a factor of 2 of overflow.) + + Underflow is harmless if underflow is gradual. Otherwise, results + may correspond to a matrix modified by perturbations of size near + the underflow threshold. + + ===================================================================== +*/ + + + ft = *f; + fa = dabs(ft); + ht = *h__; + ha = dabs(*h__); + +/* + PMAX points to the maximum absolute element of matrix + PMAX = 1 if F largest in absolute values + PMAX = 2 if G largest in absolute values + PMAX = 3 if H largest in absolute values +*/ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = dabs(gt); + if (ga == 0.f) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.f; + crt = 1.f; + slt = 0.f; + srt = 0.f; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < slamch_("EPS")) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.f) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.f; + slt = ht / gt; + srt = 1.f; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.f; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2.f - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.f) { + r__ = dabs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5f; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.f) { + +/* Note that M is very tiny */ + + if (l == 0.f) { + t = r_sign(&c_b2521, &ft) * r_sign(&c_b15, >); + } else { + t = gt / r_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.f); + } + l = sqrt(t * t + 4.f); + crt = 2.f / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = r_sign(&c_b15, csr) * r_sign(&c_b15, csl) * r_sign(&c_b15, f); + } + if (pmax == 2) { + tsign = r_sign(&c_b15, snr) * r_sign(&c_b15, csl) * r_sign(&c_b15, g); + } + if (pmax == 3) { + tsign = r_sign(&c_b15, snr) * r_sign(&c_b15, snl) * r_sign(&c_b15, + h__); + } + *ssmax = r_sign(ssmax, &tsign); + r__1 = tsign * r_sign(&c_b15, f) * r_sign(&c_b15, h__); + *ssmin = r_sign(ssmin, &r__1); + return 0; + +/* End of SLASV2 */ + +} /* slasv2_ */ + +/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1, + integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static real temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (M*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of SLASWP */ + +} /* slaswp_ */ + +/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a, + integer *lda, real *e, real *tau, real *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, iw; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static real alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *), saxpy_( + integer *, real *, real *, integer *, real *, integer *), ssymv_( + char *, integer *, real *, real *, integer *, real *, integer *, + real *, real *, integer *), slarfg_(integer *, real *, + real *, integer *, real *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SLATRD reduces NB rows and columns of a real symmetric matrix A to + symmetric tridiagonal form by an orthogonal similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', SLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', SLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by SSYTRD. + + Arguments + ========= + + UPLO (input) CHARACTER + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the orthogonal matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= (1,N). + + E (output) REAL array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) REAL array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a symmetric rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + sgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + sgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b15, &a[i__ * a_dim1 + 1], &c__1); + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1; + slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.f; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + ssymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) + * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b29, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1) + * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1] + , &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1], + lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1], + ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1] + , &c__1); + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + ssymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1] + , ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[ + i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of SLATRD */ + +} /* slatrd_ */ + +/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real aii; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], + lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + sgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + aii, &a[i__ * a_dim1 + 1], &c__1); + } else { + sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], & + c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[ + i__ + a_dim1], lda); + } else { + sscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of SLAUU2 */ + +} /* slauu2_ */ + +/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), slauu2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + slauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, + &c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + + 1], lda) + ; + slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & + c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * + a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & + c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], + lda); + slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & + c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + + ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + + ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * + a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of SLAUUM */ + +} /* slauum_ */ + +/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + slarf_(char *, integer *, integer *, real *, integer *, real *, + real *, integer *, real *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SORG2R generates an m by n real matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by SGEQRF in the first k columns of its array + argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + WORK (workspace) REAL array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.f; +/* L10: */ + } + a[j + j * a_dim1] = 1.f; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.f; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + r__1 = -tau[i__]; + sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1.f - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of SORG2R */ + +} /* sorg2r_ */ + +/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k, + real *a, integer *lda, real *tau, real *work, integer *lwork, integer + *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *), sorgqr_( + integer *, integer *, integer *, real *, integer *, real *, real * + , integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORGBR generates one of the real orthogonal matrices Q or P**T + determined by SGEBRD when reducing a real matrix A to bidiagonal + form: A = Q * B * P**T. Q and P**T are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T + is of order N: + if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m + rows of P**T, where n >= m >= k; + if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**T is + required, as defined in the transformation applied by SGEBRD: + = 'Q': generate Q; + = 'P': generate P**T. + + M (input) INTEGER + The number of rows of the matrix Q or P**T to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**T to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by SGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by SGEBRD. + K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by SGEBRD. + On exit, the M-by-N matrix Q or P**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**T, as + returned by SGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.f; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to SGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.f; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.f; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.f; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + sorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to SGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + a[a_dim1 + 1] = 1.f; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.f; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.f; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + sorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORGBR */ + +} /* sorgbr_ */ + +/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *, integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORGHR generates a real orthogonal matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + SGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of SGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by SGEHRD. + On exit, the N-by-N orthogonal matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) REAL array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEHRD. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L50: */ + } + a[j + j * a_dim1] = 1.f; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L70: */ + } + a[j + j * a_dim1] = 1.f; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORGHR */ + +} /* sorghr_ */ + +/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + real r__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + slarf_(char *, integer *, integer *, real *, integer *, real *, + real *, integer *, real *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORGL2 generates an m by n real matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by SGELQF in the first k rows of its array argument A. + On exit, the m-by-n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + WORK (workspace) REAL array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.f; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.f; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.f; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + r__1 = -tau[i__]; + sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1.f - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of SORGL2 */ + +} /* sorgl2_ */ + +/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *), slarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORGLQ generates an M-by-N real matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by SGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (real) iws; + return 0; + +/* End of SORGLQ */ + +} /* sorglq_ */ + +/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a, + integer *lda, real *tau, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real + *, integer *, real *, real *, integer *), slarfb_(char *, char *, + char *, char *, integer *, integer *, integer *, real *, integer * + , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORGQR generates an M-by-N real matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by SGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1] = (real) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + slarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.f; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (real) iws; + return 0; + +/* End of SORGQR */ + +} /* sorgqr_ */ + +/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SORM2L overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQLF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1]); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORM2L */ + +} /* sorm2l_ */ + +/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SORM2R overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORM2R */ + +} /* sorm2r_ */ + +/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, real *a, integer *lda, real *tau, real *c__, + integer *ldc, real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran, applyq; + static char transt[1]; + extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'T': P**T * C C * P**T + + Here Q and P**T are the orthogonal matrices determined by SGEBRD when + reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and + P**T are defined as products of elementary reflectors H(i) and G(i) + respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the orthogonal matrix Q or P**T that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**T; + = 'P': apply P or P**T. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**T, P or P**T from the Left; + = 'R': apply Q, Q**T, P or P**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'T': Transpose, apply Q**T or P**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by SGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by SGEBRD. + K >= 0. + + A (input) REAL array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by SGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) REAL array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by SGEBRD in the array argument TAUQ or TAUP. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q + or P*C or P**T*C or C*P or C*P**T. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.f; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to SGEBRD with nq >= k */ + + sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to SGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to SGEBRD with nq > k */ + + sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to SGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + sormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMBR */ + +} /* sormbr_ */ + +/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static real aii; + static logical left; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, + integer *, real *, real *, integer *, real *), xerbla_( + char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SORML2 overwrites the general real m by n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'T', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'T', + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'T': apply Q' (Transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) REAL array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.f; + slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1]); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of SORML2 */ + +} /* sorml2_ */ + +/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork; + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORMLQ overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGELQF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMLQ */ + +} /* sormlq_ */ + +/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORMQL overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQLF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMQL */ + +} /* sormql_ */ + +/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static real t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *), slarfb_(char *, char *, char *, char * + , integer *, integer *, integer *, real *, integer *, real *, + integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *, + real *, integer *, real *, real *, integer *); + static logical notran; + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORMQR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) REAL array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + SGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) REAL array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SGEQRF. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SORMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.f; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMQR */ + +} /* sormqr_ */ + +/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc, + real *work, integer *lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SORMTR overwrites the general real M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'T': Q**T * C C * Q**T + + where Q is a real orthogonal matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by SSYTRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**T from the Left; + = 'R': apply Q or Q**T from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from SSYTRD; + = 'L': Lower triangle of A contains elementary reflectors + from SSYTRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'T': Transpose, apply Q**T. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) REAL array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by SSYTRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) REAL array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by SSYTRD. + + C (input/output) REAL array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("SORMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.f; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to SSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + sormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to SSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1] = (real) lwkopt; + return 0; + +/* End of SORMTR */ + +} /* sormtr_ */ + +/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static real ajj; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + sgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, & + a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of SPOTF2 */ + +} /* spotf2_ */ + +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), spotf2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + spotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j * + a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda); + spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j + + a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda); + spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b15, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of SPOTRF */ + +} /* spotrf_ */ + +/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), slauum_( + char *, integer *, real *, integer *, integer *), strtri_( + char *, char *, integer *, real *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRI computes the inverse of a real symmetric positive definite + matrix A using the Cholesky factorization A = U**T*U or A = L*L**T + computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T, as computed by + SPOTRF. + On exit, the upper or lower triangle of the (symmetric) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + slauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of SPOTRI */ + +} /* spotri_ */ + +/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by SPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b15, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of SPOTRS */ + +} /* spotrs_ */ + +/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k, m; + static real p; + static integer ii, end, lgn; + static real eps, tiny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static integer lwmin, start; + extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, + integer *), slaed0_(integer *, integer *, integer *, real *, real + *, real *, integer *, real *, integer *, real *, integer *, + integer *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, + real *, integer *), slaset_(char *, integer *, integer *, + real *, real *, real *, integer *); + static integer liwmin, icompz; + static real orgnrm; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *), + slasrt_(char *, integer *, real *, integer *); + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, + real *, integer *, real *, integer *); + static integer storez, strtrw; + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band real symmetric matrix can also be + found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See SLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original dense symmetric + matrix also. On entry, Z contains the orthogonal + matrix used to reduce the original matrix to + tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) REAL array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) REAL array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LWORK must be at least + ( 1 + 3*N + 2*N*lg N + 3*N**2 ), + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1 then LWORK must be at least + ( 1 + 4*N + N**2 ). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. + If COMPZ = 'V' and N > 1 then LIWORK must be at least + ( 6 + 6*N + 5*N*lg N ). + If COMPZ = 'I' and N > 1 then LIWORK must be at least + ( 3 + 5*N ). + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (*n <= 1 || icompz <= 0) { + liwmin = 1; + lwmin = 1; + } else { + lgn = (integer) (log((real) (*n)) / log(2.f)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + + if (*info == 0) { + work[1] = (real) lwmin; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.f; + } + return 0; + } + + smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures SSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. + + If COMPZ = 'N', use SSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + return 0; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompz == 0) { + ssterf_(n, &d__[1], &e[1], info); + return 0; + } else if (icompz == 2) { + ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], + info); + return 0; + } else { + ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], + info); + return 0; + } + } + +/* + If COMPZ = 'V', the Z matrix must be stored elsewhere for later + use. +*/ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + +/* Scale. */ + + orgnrm = slanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.f) { + return 0; + } + + eps = slamch_("Epsilon"); + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* + Let END be the position of the next subdiagonal entry such that + E( END ) <= TINY or END = N if no such subdiagonal exists. The + matrix identified by the elements between START and END + constitutes an independent sub-problem. +*/ + + end = start; +L20: + if (end < *n) { + tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = + d__[end + 1], dabs(r__2))); + if ((r__1 = e[end], dabs(r__1)) > tiny) { + ++end; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = end - start + 1; + if (m == 1) { + start = end + 1; + goto L10; + } + if (m > smlsiz) { + *info = smlsiz; + +/* Scale. */ + + orgnrm = slanst_("M", &m, &d__[start], &e[start]); + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start] + , &m, info); + i__1 = m - 1; + i__2 = m - 1; + slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[ + start], &i__2, info); + + if (icompz == 1) { + strtrw = 1; + } else { + strtrw = start; + } + slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[ + 1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + + 1) + start - 1; + return 0; + } + +/* Scale back. */ + + slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start] + , &m, info); + + } else { + if (icompz == 1) { + +/* + Since QR won't update a Z matrix which is larger than the + length of D, we must solve the sub-problem in a workspace and + then multiply back into Z. +*/ + + ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ + m * m + 1], info); + slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n); + sgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[ + 1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz); + } else if (icompz == 2) { + ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start * + z_dim1], ldz, &work[1], info); + } else { + ssterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + end; + return 0; + } + } + + start = end + 1; + goto L10; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + + 1], &c__1); + } +/* L40: */ + } + } + } + + work[1] = (real) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of SSTEDC */ + +} /* sstedc_ */ + +/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e, + real *z__, integer *ldz, real *work, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + real r__1, r__2; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static real b, c__, f, g; + static integer i__, j, k, l, m; + static real p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static real rt1, rt2, eps; + static integer lsv; + static real tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + extern logical lsame_(char *, char *); + static real anorm; + extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, + integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *); + static integer lendm1, lendp1; + extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real * + , real *, real *); + extern doublereal slapy2_(real *, real *); + static integer iscale; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * + ), slaset_(char *, integer *, integer *, real *, real *, real *, + integer *); + static real ssfmin; + static integer nmaxit, icompz; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + SSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band symmetric matrix can also be found + if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to + tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + symmetric matrix. On entry, Z must contain the + orthogonal matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) REAL array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the orthogonal + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original symmetric matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) REAL array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is orthogonally similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.f; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (r__1 = e[m], dabs(r__1)); + if (tst == 0.f) { + goto L30; + } + if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m + + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.f) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + + 1], dabs(r__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.f); + r__ = slapy2_(&g, &c_b15); + g = d__[m] - p + e[l] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + r__2 = (r__1 = e[m - 1], dabs(r__1)); + tst = r__2 * r__2; + if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m + - 1], dabs(r__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.f); + r__ = slapy2_(&g, &c_b15); + g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g)); + + s = 1.f; + c__ = 1.f; + p = 0.f; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + slartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + slasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of SSTEQR */ + +} /* ssteqr_ */ + +/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info) +{ + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3; + + /* Builtin functions */ + double sqrt(doublereal), r_sign(real *, real *); + + /* Local variables */ + static real c__; + static integer i__, l, m; + static real p, r__, s; + static integer l1; + static real bb, rt1, rt2, eps, rte; + static integer lsv; + static real eps2, oldc; + static integer lend, jtot; + extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *) + ; + static real gamma, alpha, sigma, anorm; + extern doublereal slapy2_(real *, real *); + static integer iscale; + static real oldgam; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real safmax; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer lendsv; + static real ssfmin; + static integer nmaxit; + static real ssfmax; + extern doublereal slanst_(char *, integer *, real *, real *); + extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SSTERF computes all eigenvalues of a symmetric tridiagonal matrix + using the Pal-Walker-Kahan variant of the QL or QR algorithm. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) REAL array, dimension (N) + On entry, the n diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) REAL array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm failed to find all of the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_("SSTERF", &i__1); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = slamch_("E"); +/* Computing 2nd power */ + r__1 = eps; + eps2 = r__1 * r__1; + safmin = slamch_("S"); + safmax = 1.f / safmin; + ssfmax = sqrt(safmax) / 3.f; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.f; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.f; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) * + sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) { + e[m] = 0.f; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = slanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + r__1 = e[i__]; + e[i__] = r__1 * r__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ + m + 1], dabs(r__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* + If remaining matrix is 2 by 2, use SLAE2 to compute its + eigenvalues. +*/ + + if (m == l + 1) { + rte = sqrt(e[l]); + slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.f; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.f); + r__ = slapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + r_sign(&r__, &sigma)); + + c__ = 1.f; + s = 0.f; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.f) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[ + m - 1], dabs(r__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.f; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* + If remaining matrix is 2 by 2, use SLAE2 to compute its + eigenvalues. +*/ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.f; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.f); + r__ = slapy2_(&sigma, &c_b15); + sigma = p - rte / (sigma + r_sign(&r__, &sigma)); + + c__ = 1.f; + s = 0.f; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.f) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.f) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + slasrt_("I", n, &d__[1], info); + +L180: + return 0; + +/* End of SSTERF */ + +} /* ssterf_ */ + +/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a, + integer *lda, real *w, real *work, integer *lwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static real eps; + static integer inde; + static real anrm, rmin, rmax; + static integer lopt; + static real sigma; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static integer lwmin, liopt; + static logical lower, wantz; + static integer indwk2, llwrk2, iscale; + extern doublereal slamch_(char *); + static real safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, + real *, integer *, integer *, real *, integer *, integer *); + static integer indtau; + extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *, + real *, integer *, real *, integer *, integer *, integer *, + integer *), slacpy_(char *, integer *, integer *, real *, + integer *, real *, integer *); + static integer indwrk, liwmin; + extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); + extern doublereal slansy_(char *, char *, integer *, real *, integer *, + real *); + static integer llwork; + static real smlnum; + static logical lquery; + extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *, + integer *, real *, integer *, real *, real *, integer *, real *, + integer *, integer *), ssytrd_(char *, + integer *, real *, integer *, real *, real *, real *, real *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SSYEVD computes all eigenvalues and, optionally, eigenvectors of a + real symmetric matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Because of large use of BLAS of level 3, SSYEVD needs N**2 more + workspace than SSYEVX. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA, N) + On entry, the symmetric matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) REAL array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) REAL array, + dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. + If JOBZ = 'V' and N > 1, LWORK must be at least + 1 + 6*N + 2*N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the algorithm failed to converge; i + off-diagonal elements of an intermediate tridiagonal + form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + Modified by Francoise Tisseur, University of Tennessee. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } + lopt = lwmin; + liopt = liwmin; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + + if (*info == 0) { + work[1] = (real) lopt; + iwork[1] = liopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.f; + } + return 0; + } + +/* Get machine constants. */ + + safmin = slamch_("Safe minimum"); + eps = slamch_("Precision"); + smlnum = safmin / eps; + bignum = 1.f / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]); + iscale = 0; + if (anrm > 0.f && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + slascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call SSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); + lopt = (*n << 1) + work[indwrk]; + +/* + For eigenvalues only, call SSTERF. For eigenvectors, first call + SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call SORMTR to multiply it by the + Householder transformations stored in A. +*/ + + if (! wantz) { + ssterf_(n, &w[1], &work[inde], info); + } else { + sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info); + sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* + Computing MAX + Computing 2nd power +*/ + i__3 = *n; + i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1); + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + r__1 = 1.f / sigma; + sscal_(n, &r__1, &w[1], &c__1); + } + + work[1] = (real) lopt; + iwork[1] = liopt; + + return 0; + +/* End of SSYEVD */ + +} /* ssyevd_ */ + +/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__; + static real taui; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, + integer *, real *, integer *, real *, integer *); + static real alpha; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *), ssymv_(char *, integer *, real *, real *, + integer *, real *, integer *, real *, real *, integer *), + xerbla_(char *, integer *), slarfg_(integer *, real *, + real *, integer *, real *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal + form T by an orthogonal similarity transformation: Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYTD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.f) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.f; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + ssyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.f) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.f; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + ssyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of SSYTD2 */ + +} /* ssytd2_ */ + +/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda, + real *d__, real *e, real *tau, real *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *, + real *, real *, real *, integer *), ssyr2k_(char *, char * + , integer *, integer *, real *, real *, integer *, real *, + integer *, real *, real *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *, + integer *, real *, real *, real *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + SSYTRD reduces a real symmetric matrix A to real symmetric + tridiagonal form T by an orthogonal similarity transformation: + Q**T * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the orthogonal + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the orthogonal matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) REAL array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) REAL array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) REAL array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) REAL array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a real scalar, and v is a real vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1] = (real) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("SSYTRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.f; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ * + a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset], + lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+ib:n,i+ib:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1] = (real) lwkopt; + return 0; + +/* End of SSYTRD */ + +} /* ssytrd_ */ + +/* Subroutine */ int strevc_(char *side, char *howmny, logical *select, + integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, + integer *ldvr, integer *mm, integer *m, real *work, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3; + real r__1, r__2, r__3, r__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k; + static real x[4] /* was [2][2] */; + static integer j1, j2, n2, ii, ki, ip, is; + static real wi, wr, rec, ulp, beta, emax; + static logical pair, allv; + static integer ierr; + static real unfl, ovfl, smin; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + static logical over; + static real vmax; + static integer jnxt; + static real scale; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static real remax; + static logical leftv; + extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, + real *, integer *, real *, integer *, real *, real *, integer *); + static logical bothv; + static real vcrit; + static logical somev; + extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, + integer *); + static real xnorm; + extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, + real *, integer *), slaln2_(logical *, integer *, integer *, real + *, real *, real *, integer *, real *, real *, real *, integer *, + real *, real *, real *, integer *, real *, real *, integer *), + slabad_(real *, real *); + extern doublereal slamch_(char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + static real bignum; + extern integer isamax_(integer *, real *, integer *); + static logical rightv; + static real smlnum; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + STREVC computes some or all of the right and/or left eigenvectors of + a real upper quasi-triangular matrix T. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, y'*T = w*y' + + where y' denotes the conjugate transpose of the vector y. + + If all eigenvectors are requested, the routine may either return the + matrices X and/or Y of right or left eigenvectors of T, or the + products Q*X and/or Q*Y, where Q is an input orthogonal + matrix. If T was obtained from the real-Schur factorization of an + original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of + right or left eigenvectors of A. + + T must be in Schur canonical form (as returned by SHSEQR), that is, + block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each + 2-by-2 diagonal block has its diagonal elements equal and its + off-diagonal elements of opposite sign. Corresponding to each 2-by-2 + diagonal block is a complex conjugate pair of eigenvalues and + eigenvectors; only one eigenvector of the pair is computed, namely + the one corresponding to the eigenvalue with positive imaginary part. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + and backtransform them using the input matrices + supplied in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + specified by the logical array SELECT. + + SELECT (input/output) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If HOWMNY = 'A' or 'B', SELECT is not referenced. + To select the real eigenvector corresponding to a real + eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select + the complex eigenvector corresponding to a complex conjugate + pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be + set to .TRUE.; then on exit SELECT(j) is .TRUE. and + SELECT(j+1) is .FALSE.. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input) REAL array, dimension (LDT,N) + The upper quasi-triangular matrix T in Schur canonical form. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) REAL array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by SHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + VL has the same quasi-lower triangular form + as T'. If T(i,i) is a real eigenvalue, then + the i-th column VL(i) of VL is its + corresponding eigenvector. If T(i:i+1,i:i+1) + is a 2-by-2 block whose eigenvalues are + complex-conjugate eigenvalues of T, then + VL(i)+sqrt(-1)*VL(i+1) is the complex + eigenvector corresponding to the eigenvalue + with positive real part. + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part, and the second the imaginary part. + If SIDE = 'R', VL is not referenced. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= max(1,N) if + SIDE = 'L' or 'B'; LDVL >= 1 otherwise. + + VR (input/output) REAL array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the orthogonal matrix Q + of Schur vectors returned by SHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + VR has the same quasi-upper triangular form + as T. If T(i,i) is a real eigenvalue, then + the i-th column VR(i) of VR is its + corresponding eigenvector. If T(i:i+1,i:i+1) + is a 2-by-2 block whose eigenvalues are + complex-conjugate eigenvalues of T, then + VR(i)+sqrt(-1)*VR(i+1) is the complex + eigenvector corresponding to the eigenvalue + with positive real part. + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + A complex eigenvector corresponding to a complex eigenvalue + is stored in two consecutive columns, the first holding the + real part and the second the imaginary part. + If SIDE = 'L', VR is not referenced. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= max(1,N) if + SIDE = 'R' or 'B'; LDVR >= 1 otherwise. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. + If HOWMNY = 'A' or 'B', M is set to N. + Each selected real eigenvector occupies one column and each + selected complex eigenvector occupies two columns. + + WORK (workspace) REAL array, dimension (3*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + +/* + Set M to the number of columns required to store the selected + eigenvectors, standardize the array SELECT if necessary, and + test MM. +*/ + + if (somev) { + *m = 0; + pair = FALSE_; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.f) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } +/* L10: */ + } + } else { + *m = *n; + } + + if (*mm < *m) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = slamch_("Safe minimum"); + ovfl = 1.f / unfl; + slabad_(&unfl, &ovfl); + ulp = slamch_("Precision"); + smlnum = unfl * (*n / ulp); + bignum = (1.f - ulp) / smlnum; + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + work[1] = 0.f; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + work[j] = 0.f; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1)); +/* L20: */ + } +/* L30: */ + } + +/* + Index IP is used to specify the real or complex eigenvalue: + IP = 0, real eigenvalue, + 1, first of conjugate complex pair: (wr,wi) + -1, second of conjugate complex pair: (wr,wi) +*/ + + n2 = *n << 1; + + if (rightv) { + +/* Compute right eigenvectors. */ + + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (ip == 1) { + goto L130; + } + if (ki == 1) { + goto L40; + } + if (t[ki + (ki - 1) * t_dim1] == 0.f) { + goto L40; + } + ip = -1; + +L40: + if (somev) { + if (ip == 0) { + if (! select[ki]) { + goto L130; + } + } else { + if (! select[ki - 1]) { + goto L130; + } + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.f; + if (ip != 0) { + wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) * + sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2))); + } +/* Computing MAX */ + r__1 = ulp * (dabs(wr) + dabs(wi)); + smin = dmax(r__1,smlnum); + + if (ip == 0) { + +/* Real right eigenvector */ + + work[ki + *n] = 1.f; + +/* Form right-hand side */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -t[k + ki * t_dim1]; +/* L50: */ + } + +/* + Solve the upper quasi-triangular system: + (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +*/ + + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.f) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + slaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* + Scale X(1,1) to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.f) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j + *n] = x[0]; + +/* Update right-hand side */ + + i__1 = j - 1; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + slaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X(1,1) and X(2,1) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.f) { +/* Computing MAX */ + r__1 = work[j - 1], r__2 = work[j]; + beta = dmax(r__1,r__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + +/* Update right-hand side */ + + i__1 = j - 2; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[1]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + } +L60: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1)); + sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + is * vr_dim1] = 0.f; +/* L70: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki + *n], &vr[ki * + vr_dim1 + 1], &c__1); + } + + ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1)); + sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + + } else { + +/* + Complex right eigenvector. + + Initial solve + [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. + [ (T(KI,KI-1) T(KI,KI) ) ] +*/ + + if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[ + ki + (ki - 1) * t_dim1], dabs(r__2))) { + work[ki - 1 + *n] = 1.f; + work[ki + n2] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + n2] = 1.f; + } + work[ki + *n] = 0.f; + work[ki - 1 + n2] = 0.f; + +/* Form right-hand side */ + + i__1 = ki - 2; + for (k = 1; k <= i__1; ++k) { + work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * + t_dim1]; + work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1]; +/* L80: */ + } + +/* + Solve upper quasi-triangular system: + (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +*/ + + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.f) { + j1 = j - 1; + jnxt = j - 2; + } + } + + if (j1 == j2) { + +/* 1-by-1 diagonal block */ + + slaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &wi, x, &c__2, &scale, &xnorm, & + ierr); + +/* + Scale X(1,1) and X(1,2) to avoid overflow when + updating the right-hand side. +*/ + + if (xnorm > 1.f) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + sscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + +/* Update the right-hand side */ + + i__1 = j - 1; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 1; + r__1 = -x[2]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + + } else { + +/* 2-by-2 diagonal block */ + + slaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j - + 1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, & + work[j - 1 + *n], n, &wr, &wi, x, &c__2, & + scale, &xnorm, &ierr); + +/* + Scale X to avoid overflow when updating + the right-hand side. +*/ + + if (xnorm > 1.f) { +/* Computing MAX */ + r__1 = work[j - 1], r__2 = work[j]; + beta = dmax(r__1,r__2); + if (beta > bignum / xnorm) { + rec = 1.f / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + +/* Scale if necessary */ + + if (scale != 1.f) { + sscal_(&ki, &scale, &work[*n + 1], &c__1); + sscal_(&ki, &scale, &work[n2 + 1], &c__1); + } + work[j - 1 + *n] = x[0]; + work[j + *n] = x[1]; + work[j - 1 + n2] = x[2]; + work[j + n2] = x[3]; + +/* Update the right-hand side */ + + i__1 = j - 2; + r__1 = -x[0]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[*n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[1]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + *n + 1], &c__1); + i__1 = j - 2; + r__1 = -x[2]; + saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[n2 + 1], &c__1); + i__1 = j - 2; + r__1 = -x[3]; + saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[ + n2 + 1], &c__1); + } +L90: + ; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 + + 1], &c__1); + scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], & + c__1); + + emax = 0.f; + i__1 = ki; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1] + , dabs(r__1)) + (r__2 = vr[k + is * vr_dim1], + dabs(r__2)); + emax = dmax(r__3,r__4); +/* L100: */ + } + + remax = 1.f / emax; + sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.f; + vr[k + is * vr_dim1] = 0.f; +/* L110: */ + } + + } else { + + if (ki > 2) { + i__1 = ki - 2; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[( + ki - 1) * vr_dim1 + 1], &c__1); + i__1 = ki - 2; + sgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, & + work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * + vr_dim1 + 1], &c__1); + } else { + sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 + + 1], &c__1); + sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], & + c__1); + } + + emax = 0.f; + i__1 = *n; + for (k = 1; k <= i__1; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1] + , dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1], + dabs(r__2)); + emax = dmax(r__3,r__4); +/* L120: */ + } + remax = 1.f / emax; + sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + } + + --is; + if (ip != 0) { + --is; + } +L130: + if (ip == 1) { + ip = 0; + } + if (ip == -1) { + ip = 1; + } +/* L140: */ + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + ip = 0; + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (ip == -1) { + goto L250; + } + if (ki == *n) { + goto L150; + } + if (t[ki + 1 + ki * t_dim1] == 0.f) { + goto L150; + } + ip = 1; + +L150: + if (somev) { + if (! select[ki]) { + goto L250; + } + } + +/* Compute the KI-th eigenvalue (WR,WI). */ + + wr = t[ki + ki * t_dim1]; + wi = 0.f; + if (ip != 0) { + wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) * + sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2))); + } +/* Computing MAX */ + r__1 = ulp * (dabs(wr) + dabs(wi)); + smin = dmax(r__1,smlnum); + + if (ip == 0) { + +/* Real left eigenvector. */ + + work[ki + *n] = 1.f; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + *n] = -t[ki + k * t_dim1]; +/* L160: */ + } + +/* + Solve the quasi-triangular system: + (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK +*/ + + vmax = 1.f; + vcrit = bignum; + + jnxt = ki + 1; + i__2 = *n; + for (j = ki + 1; j <= i__2; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.f) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. +*/ + + if (work[j] > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + +/* Solve (T(J,J)-WR)'*X = WORK */ + + slaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; +/* Computing MAX */ + r__2 = (r__1 = work[j + *n], dabs(r__1)); + vmax = dmax(r__2,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side. + + Computing MAX +*/ + r__1 = work[j], r__2 = work[j + 1]; + beta = dmax(r__1,r__2); + if (beta > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 1; + work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], + &c__1, &work[ki + 1 + *n], &c__1); + + i__3 = j - ki - 1; + work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) * + t_dim1], &c__1, &work[ki + 1 + *n], &c__1); + +/* + Solve + [T(J,J)-WR T(J,J+1) ]'* X = SCALE*( WORK1 ) + [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +*/ + + slaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm, + &ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + } + work[j + *n] = x[0]; + work[j + 1 + *n] = x[1]; + +/* Computing MAX */ + r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( + r__2 = work[j + 1 + *n], dabs(r__2)), r__3 = + max(r__3,r__4); + vmax = dmax(r__3,vmax); + vcrit = bignum / vmax; + + } +L170: + ; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + + i__2 = *n - ki + 1; + ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - + 1; + remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1)); + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.f; +/* L180: */ + } + + } else { + + if (ki < *n) { + i__2 = *n - ki; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1 + + 1], ldvl, &work[ki + 1 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + } + + ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1)); + sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + + } + + } else { + +/* + Complex left eigenvector. + + Initial solve: + ((T(KI,KI) T(KI,KI+1) )' - (WR - I* WI))*X = 0. + ((T(KI+1,KI) T(KI+1,KI+1)) ) +*/ + + if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 = + t[ki + 1 + ki * t_dim1], dabs(r__2))) { + work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + n2] = 1.f; + } else { + work[ki + *n] = 1.f; + work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + *n] = 0.f; + work[ki + n2] = 0.f; + +/* Form right-hand side */ + + i__2 = *n; + for (k = ki + 2; k <= i__2; ++k) { + work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1]; + work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1] + ; +/* L190: */ + } + +/* + Solve complex quasi-triangular system: + ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +*/ + + vmax = 1.f; + vcrit = bignum; + + jnxt = ki + 2; + i__2 = *n; + for (j = ki + 2; j <= i__2; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.f) { + j2 = j + 1; + jnxt = j + 2; + } + } + + if (j1 == j2) { + +/* + 1-by-1 diagonal block + + Scale if necessary to avoid overflow when + forming the right-hand side elements. +*/ + + if (work[j] > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + i__3 = j - ki - 2; + work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + +/* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */ + + r__1 = -wi; + slaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; +/* Computing MAX */ + r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = ( + r__2 = work[j + n2], dabs(r__2)), r__3 = max( + r__3,r__4); + vmax = dmax(r__3,vmax); + vcrit = bignum / vmax; + + } else { + +/* + 2-by-2 diagonal block + + Scale if necessary to avoid overflow when forming + the right-hand side elements. + + Computing MAX +*/ + r__1 = work[j], r__2 = work[j + 1]; + beta = dmax(r__1,r__2); + if (beta > vcrit) { + rec = 1.f / vmax; + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &rec, &work[ki + n2], &c__1); + vmax = 1.f; + vcrit = bignum; + } + + i__3 = j - ki - 2; + work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], + &c__1, &work[ki + 2 + n2], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + *n], &c__1); + + i__3 = j - ki - 2; + work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) * + t_dim1], &c__1, &work[ki + 2 + n2], &c__1); + +/* + Solve 2-by-2 complex linear equation + ([T(j,j) T(j,j+1) ]'-(wr-i*wi)*I)*X = SCALE*B + ([T(j+1,j) T(j+1,j+1)] ) +*/ + + r__1 = -wi; + slaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j + + j * t_dim1], ldt, &c_b15, &c_b15, &work[j + * + n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, & + ierr); + +/* Scale if necessary */ + + if (scale != 1.f) { + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + *n], &c__1); + i__3 = *n - ki + 1; + sscal_(&i__3, &scale, &work[ki + n2], &c__1); + } + work[j + *n] = x[0]; + work[j + n2] = x[2]; + work[j + 1 + *n] = x[1]; + work[j + 1 + n2] = x[3]; +/* Computing MAX */ + r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1, + r__2), r__2 = dabs(x[1]), r__1 = max(r__1, + r__2), r__2 = dabs(x[3]), r__1 = max(r__1, + r__2); + vmax = dmax(r__1,vmax); + vcrit = bignum / vmax; + + } +L200: + ; + } + +/* + Copy the vector x or Q*x to VL and normalize. + + L210: +*/ + if (! over) { + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * + vl_dim1], &c__1); + i__2 = *n - ki + 1; + scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * + vl_dim1], &c__1); + + emax = 0.f; + i__2 = *n; + for (k = ki; k <= i__2; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1], + dabs(r__1)) + (r__2 = vl[k + (is + 1) * + vl_dim1], dabs(r__2)); + emax = dmax(r__3,r__4); +/* L220: */ + } + remax = 1.f / emax; + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + i__2 = *n - ki + 1; + sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1) + ; + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + vl[k + is * vl_dim1] = 0.f; + vl[k + (is + 1) * vl_dim1] = 0.f; +/* L230: */ + } + } else { + if (ki < *n - 1) { + i__2 = *n - ki - 1; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + *n], &c__1, &work[ + ki + *n], &vl[ki * vl_dim1 + 1], &c__1); + i__2 = *n - ki - 1; + sgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1 + + 1], ldvl, &work[ki + 2 + n2], &c__1, &work[ + ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], & + c__1); + } else { + sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], & + c__1); + sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + + 1], &c__1); + } + + emax = 0.f; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { +/* Computing MAX */ + r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1], + dabs(r__1)) + (r__2 = vl[k + (ki + 1) * + vl_dim1], dabs(r__2)); + emax = dmax(r__3,r__4); +/* L240: */ + } + remax = 1.f / emax; + sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + + } + + } + + ++is; + if (ip != 0) { + ++is; + } +L250: + if (ip == -1) { + ip = 0; + } + if (ip == 1) { + ip = -1; + } + +/* L260: */ + } + + } + + return 0; + +/* End of STREVC */ + +} /* strevc_ */ + +/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer j; + static real ajj; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, + real *, integer *, real *, integer *), + xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + STRTI2 computes the inverse of a real upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.f; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1.f / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.f; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of STRTI2 */ + +} /* strti2_ */ + +/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, + integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strmm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), strsm_(char *, char *, char *, + char *, integer *, integer *, real *, real *, integer *, real *, + integer *), strti2_(char *, char * + , integer *, real *, integer *, integer *), + xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + STRTRI computes the inverse of a real upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("STRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.f) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + strti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b15, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + c_b151, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b15, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &c_b151, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of STRTRI */ + +} /* strtri_ */ + diff --git a/numpy/linalg/lapack_lite/f2c_z_lapack.c b/numpy/linalg/lapack_lite/f2c_z_lapack.c new file mode 100644 index 000000000..143b7254f --- /dev/null +++ b/numpy/linalg/lapack_lite/f2c_z_lapack.c @@ -0,0 +1,26995 @@ +/* +NOTE: This is generated code. Look in Misc/lapack_lite for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal dlamch_(char *); +#define EPSILON dlamch_("Epsilon") +#define SAFEMINIMUM dlamch_("Safe minimum") +#define PRECISION dlamch_("Precision") +#define BASE dlamch_("Base") +#endif + +extern doublereal dlapy2_(doublereal *x, doublereal *y); + +/* +f2c knows the exact rules for precedence, and so omits parentheses where not +strictly necessary. Since this is generated code, we don't really care if +it's readable, and we know what is written is correct. So don't warn about +them. +*/ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wparentheses" +#endif + + +/* Table of constant values */ + +static integer c__1 = 1; +static doublecomplex c_b59 = {0.,0.}; +static doublecomplex c_b60 = {1.,0.}; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__0 = 0; +static integer c__8 = 8; +static integer c__4 = 4; +static integer c__65 = 65; +static integer c__6 = 6; +static integer c__9 = 9; +static doublereal c_b324 = 0.; +static doublereal c_b1015 = 1.; +static integer c__15 = 15; +static logical c_false = FALSE_; +static doublereal c_b1294 = -1.; +static doublereal c_b2210 = .5; + +/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, ix, iy; + static doublecomplex ctemp; + + +/* + applies a plane rotation, where the cos and sin (c and s) are real + and the vectors cx and cy are complex. + jack dongarra, linpack, 3/11/78. + + + ===================================================================== +*/ + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments not equal + to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = iy; + z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = iy; + i__3 = iy; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + i__4 = ix; + z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = ix; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; +/* L10: */ + } + return 0; + +/* code for both increments equal to 1 */ + +L20: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; + i__3 = i__; + z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = i__; + i__3 = i__; + z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; + i__4 = i__; + z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; + i__2 = i__; + cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; +/* L30: */ + } + return 0; +} /* zdrot_ */ + +/* Subroutine */ int zgebak_(char *job, char *side, integer *n, integer *ilo, + integer *ihi, doublereal *scale, integer *m, doublecomplex *v, + integer *ldv, integer *info) +{ + /* System generated locals */ + integer v_dim1, v_offset, i__1; + + /* Local variables */ + static integer i__, k; + static doublereal s; + static integer ii; + extern logical lsame_(char *, char *); + static logical leftv; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), xerbla_(char *, integer *), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + static logical rightv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGEBAK forms the right or left eigenvectors of a complex general + matrix by backward transformation on the computed eigenvectors of the + balanced matrix output by ZGEBAL. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the type of backward transformation required: + = 'N', do nothing, return immediately; + = 'P', do backward transformation for permutation only; + = 'S', do backward transformation for scaling only; + = 'B', do backward transformations for both permutation and + scaling. + JOB must be the same as the argument JOB supplied to ZGEBAL. + + SIDE (input) CHARACTER*1 + = 'R': V contains right eigenvectors; + = 'L': V contains left eigenvectors. + + N (input) INTEGER + The number of rows of the matrix V. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + The integers ILO and IHI determined by ZGEBAL. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + SCALE (input) DOUBLE PRECISION array, dimension (N) + Details of the permutation and scaling factors, as returned + by ZGEBAL. + + M (input) INTEGER + The number of columns of the matrix V. M >= 0. + + V (input/output) COMPLEX*16 array, dimension (LDV,M) + On entry, the matrix of right or left eigenvectors to be + transformed, as returned by ZHSEIN or ZTREVC. + On exit, V is overwritten by the transformed eigenvectors. + + LDV (input) INTEGER + The leading dimension of the array V. LDV >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Decode and Test the input parameters +*/ + + /* Parameter adjustments */ + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + + /* Function Body */ + rightv = lsame_(side, "R"); + leftv = lsame_(side, "L"); + + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (! rightv && ! leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEBAK", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, "N")) { + return 0; + } + + if (*ilo == *ihi) { + goto L30; + } + +/* Backward balance */ + + if (lsame_(job, "S") || lsame_(job, "B")) { + + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + zdscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L10: */ + } + } + + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + zdscal_(m, &s, &v[i__ + v_dim1], ldv); +/* L20: */ + } + } + + } + +/* + Backward permutation + + For I = ILO-1 step -1 until 1, + IHI+1 step 1 until N do -- +*/ + +L30: + if (lsame_(job, "P") || lsame_(job, "B")) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L40; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L40: + ; + } + } + + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer) scale[i__]; + if (k == i__) { + goto L50; + } + zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); +L50: + ; + } + } + } + + return 0; + +/* End of ZGEBAK */ + +} /* zgebak_ */ + +/* Subroutine */ int zgebal_(char *job, integer *n, doublecomplex *a, integer + *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double d_imag(doublecomplex *), z_abs(doublecomplex *); + + /* Local variables */ + static doublereal c__, f, g; + static integer i__, j, k, l, m; + static doublereal r__, s, ca, ra; + static integer ica, ira, iexc; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static doublereal sfmin1, sfmin2, sfmax1, sfmax2; + + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + static logical noconv; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGEBAL balances a general complex matrix A. This involves, first, + permuting A by a similarity transformation to isolate eigenvalues + in the first 1 to ILO-1 and last IHI+1 to N elements on the + diagonal; and second, applying a diagonal similarity transformation + to rows and columns ILO to IHI to make the rows and columns as + close in norm as possible. Both steps are optional. + + Balancing may reduce the 1-norm of the matrix, and improve the + accuracy of the computed eigenvalues and/or eigenvectors. + + Arguments + ========= + + JOB (input) CHARACTER*1 + Specifies the operations to be performed on A: + = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 + for i = 1,...,N; + = 'P': permute only; + = 'S': scale only; + = 'B': both permute and scale. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the input matrix A. + On exit, A is overwritten by the balanced matrix. + If JOB = 'N', A is not referenced. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + ILO (output) INTEGER + IHI (output) INTEGER + ILO and IHI are set to integers such that on exit + A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. + If JOB = 'N' or 'S', ILO = 1 and IHI = N. + + SCALE (output) DOUBLE PRECISION array, dimension (N) + Details of the permutations and scaling factors applied to + A. If P(j) is the index of the row and column interchanged + with row and column j and D(j) is the scaling factor + applied to row and column j, then + SCALE(j) = P(j) for j = 1,...,ILO-1 + = D(j) for j = ILO,...,IHI + = P(j) for j = IHI+1,...,N. + The order in which the interchanges are made is N to IHI+1, + then 1 to ILO-1. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The permutations consist of row and column interchanges which put + the matrix in the form + + ( T1 X Y ) + P A P = ( 0 B Z ) + ( 0 0 T2 ) + + where T1 and T2 are upper triangular matrices whose eigenvalues lie + along the diagonal. The column indices ILO and IHI mark the starting + and ending columns of the submatrix B. Balancing consists of applying + a diagonal similarity transformation inv(D) * B * D to make the + 1-norms of each row of B and its corresponding column nearly equal. + The output matrix is + + ( T1 X*D Y ) + ( 0 inv(D)*B*D inv(D)*Z ). + ( 0 0 T2 ) + + Information about the permutations P and the diagonal matrix D is + returned in the vector SCALE. + + This subroutine is based on the EISPACK routine CBAL. + + Modified by Tzu-Yi Chen, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + + /* Function Body */ + *info = 0; + if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") + && ! lsame_(job, "B")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEBAL", &i__1); + return 0; + } + + k = 1; + l = *n; + + if (*n == 0) { + goto L210; + } + + if (lsame_(job, "N")) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L10: */ + } + goto L210; + } + + if (lsame_(job, "S")) { + goto L120; + } + +/* Permutation to isolate eigenvalues if possible */ + + goto L50; + +/* Row and column exchange. */ + +L20: + scale[m] = (doublereal) j; + if (j == m) { + goto L30; + } + + zswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + zswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); + +L30: + switch (iexc) { + case 1: goto L40; + case 2: goto L80; + } + +/* Search for rows isolating an eigenvalue and push them down. */ + +L40: + if (l == 1) { + goto L210; + } + --l; + +L50: + for (j = l; j >= 1; --j) { + + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + i__2 = j + i__ * a_dim1; + if (a[i__2].r != 0. || d_imag(&a[j + i__ * a_dim1]) != 0.) { + goto L70; + } +L60: + ; + } + + m = l; + iexc = 1; + goto L20; +L70: + ; + } + + goto L90; + +/* Search for columns isolating an eigenvalue and push them left. */ + +L80: + ++k; + +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + i__3 = i__ + j * a_dim1; + if (a[i__3].r != 0. || d_imag(&a[i__ + j * a_dim1]) != 0.) { + goto L110; + } +L100: + ; + } + + m = k; + iexc = 2; + goto L20; +L110: + ; + } + +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; +/* L130: */ + } + + if (lsame_(job, "P")) { + goto L210; + } + +/* + Balance the submatrix in rows K to L. + + Iterative loop for norm reduction +*/ + + sfmin1 = SAFEMINIMUM / PRECISION; + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 8.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + c__ = 0.; + r__ = 0.; + + i__2 = l; + for (j = k; j <= i__2; ++j) { + if (j == i__) { + goto L150; + } + i__3 = j + i__ * a_dim1; + c__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * + a_dim1]), abs(d__2)); + i__3 = i__ + j * a_dim1; + r__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + j * + a_dim1]), abs(d__2)); +L150: + ; + } + ica = izamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = z_abs(&a[ica + i__ * a_dim1]); + i__2 = *n - k + 1; + ira = izamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = z_abs(&a[i__ + (ira + k - 1) * a_dim1]); + +/* Guard against zero C or R due to underflow. */ + + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 8.; + f = 1.; + s = c__ + r__; +L160: +/* Computing MAX */ + d__1 = max(f,c__); +/* Computing MIN */ + d__2 = min(r__,g); + if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { + goto L170; + } + f *= 8.; + c__ *= 8.; + ca *= 8.; + r__ /= 8.; + g /= 8.; + ra /= 8.; + goto L160; + +L170: + g = c__ / 8.; +L180: +/* Computing MIN */ + d__1 = min(f,c__), d__1 = min(d__1,g); + if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { + goto L190; + } + f /= 8.; + c__ /= 8.; + g /= 8.; + ca /= 8.; + r__ *= 8.; + ra *= 8.; + goto L180; + +/* Now balance. */ + +L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + + i__2 = *n - k + 1; + zdscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + zdscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + +L200: + ; + } + + if (noconv) { + goto L140; + } + +L210: + *ilo = k; + *ihi = l; + + return 0; + +/* End of ZGEBAL */ + +} /* zgebal_ */ + +/* Subroutine */ int zgebd2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGEBD2 reduces a complex general m by n matrix A to upper or lower + real bidiagonal form B by a unitary transformation: Q' * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX*16 array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace) COMPLEX*16 array, dimension (max(M,N)) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, v and u are complex vectors; + v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); + u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); + tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("ZGEBD2", &i__1); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + d_cnjg(&z__1, &tauq[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.; + + if (i__ < *n) { + +/* + Generate elementary reflector G(i) to annihilate + A(i,i+2:n) +*/ + + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + zlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1]); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + (i__ + 1) * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + } else { + i__2 = i__; + taup[i__2].r = 0., taup[i__2].i = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; +/* Computing MIN */ + i__4 = i__ + 1; + zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ + i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__; + a[i__2].r = d__[i__3], a[i__2].i = 0.; + + if (i__ < *m) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:m,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + d_cnjg(&z__1, &tauq[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &z__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & + work[1]); + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + } else { + i__2 = i__; + tauq[i__2].r = 0., tauq[i__2].i = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of ZGEBD2 */ + +} /* zgebd2_ */ + +/* Subroutine */ int zgebrd_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, + doublecomplex *taup, doublecomplex *work, integer *lwork, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, nb, nx; + static doublereal ws; + static integer nbmin, iinfo, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgebd2_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *), + xerbla_(char *, integer *), zlabrd_(integer *, integer *, + integer *, doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer ldwrkx, ldwrky, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGEBRD reduces a general complex M-by-N matrix A to upper or lower + bidiagonal form B by a unitary transformation: Q**H * A * P = B. + + If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. M >= 0. + + N (input) INTEGER + The number of columns in the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N general matrix to be reduced. + On exit, + if m >= n, the diagonal and the first superdiagonal are + overwritten with the upper bidiagonal matrix B; the + elements below the diagonal, with the array TAUQ, represent + the unitary matrix Q as a product of elementary + reflectors, and the elements above the first superdiagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors; + if m < n, the diagonal and the first subdiagonal are + overwritten with the lower bidiagonal matrix B; the + elements below the first subdiagonal, with the array TAUQ, + represent the unitary matrix Q as a product of + elementary reflectors, and the elements above the diagonal, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (min(M,N)) + The diagonal elements of the bidiagonal matrix B: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) + The off-diagonal elements of the bidiagonal matrix B: + if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; + if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. + + TAUQ (output) COMPLEX*16 array dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,M,N). + For optimum performance LWORK >= (M+N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + If m >= n, + + Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in + A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in + A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, + + Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The contents of A on exit are illustrated by the following examples: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) + ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) + ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) + ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) + ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) + ( v1 v2 v3 v4 v5 ) + + where d and e denote diagonal and off-diagonal elements of B, vi + denotes an element of the vector defining H(i), and ui an element of + the vector defining G(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + d__1 = (doublereal) lwkopt; + work[1].r = d__1, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_("ZGEBRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + ws = (doublereal) max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* + Set the crossover point NX. + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (doublereal) ((*m + *n) * nb); + if ((doublereal) (*lwork) < ws) { + +/* + Not enough work space for the optimal NB, consider using + a smaller block size. +*/ + + nbmin = ilaenv_(&c__2, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* + Reduce rows and columns i:i+ib-1 to bidiagonal form and return + the matrices X and Y which are needed to update the unreduced + part of the matrix +*/ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + zlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* + Update the trailing submatrix A(i+ib:m,i+ib:n), using + an update of the form A := A - V*Y' - X*U' +*/ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & + z__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + + nb + 1], &ldwrky, &c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], + lda); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &z__1, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], lda); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.; + i__4 = j + (j + 1) * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + j * a_dim1; + i__5 = j; + a[i__4].r = d__[i__5], a[i__4].i = 0.; + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1].r = ws, work[1].i = 0.; + return 0; + +/* End of ZGEBRD */ + +} /* zgebrd_ */ + +/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n, + doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Builtin functions */ + double sqrt(doublereal), d_imag(doublecomplex *); + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, k, ihi; + static doublereal scl; + static integer ilo; + static doublereal dum[1], eps; + static doublecomplex tmp; + static integer ibal; + static char side[1]; + static integer maxb; + static doublereal anrm; + static integer ierr, itau, iwrk, nout; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dznrm2_(integer *, doublecomplex *, integer *); + static logical scalea; + + static doublereal cscale; + extern /* Subroutine */ int zgebak_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + integer *), zgebal_(char *, integer *, + doublecomplex *, integer *, integer *, integer *, doublereal *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical select[1]; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + static doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex *, + integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + static integer minwrk, maxwrk; + static logical wantvl; + static doublereal smlnum; + static integer hswork, irwork; + extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrevc_(char *, char *, logical *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, doublecomplex *, + doublereal *, integer *); + static logical lquery, wantvr; + extern /* Subroutine */ int zunghr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. + + The right eigenvector v(j) of A satisfies + A * v(j) = lambda(j) * v(j) + where lambda(j) is its eigenvalue. + The left eigenvector u(j) of A satisfies + u(j)**H * A = lambda(j) * u(j)**H + where u(j)**H denotes the conjugate transpose of u(j). + + The computed eigenvectors are normalized to have Euclidean norm + equal to 1 and largest component real. + + Arguments + ========= + + JOBVL (input) CHARACTER*1 + = 'N': left eigenvectors of A are not computed; + = 'V': left eigenvectors of are computed. + + JOBVR (input) CHARACTER*1 + = 'N': right eigenvectors of A are not computed; + = 'V': right eigenvectors of A are computed. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N matrix A. + On exit, A has been overwritten. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + W contains the computed eigenvalues. + + VL (output) COMPLEX*16 array, dimension (LDVL,N) + If JOBVL = 'V', the left eigenvectors u(j) are stored one + after another in the columns of VL, in the same order + as their eigenvalues. + If JOBVL = 'N', VL is not referenced. + u(j) = VL(:,j), the j-th column of VL. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= 1; if + JOBVL = 'V', LDVL >= N. + + VR (output) COMPLEX*16 array, dimension (LDVR,N) + If JOBVR = 'V', the right eigenvectors v(j) are stored one + after another in the columns of VR, in the same order + as their eigenvalues. + If JOBVR = 'N', VR is not referenced. + v(j) = VR(:,j), the j-th column of VR. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= 1; if + JOBVR = 'V', LDVR >= N. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,2*N). + For good performance, LWORK must generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = i, the QR algorithm failed to compute all the + eigenvalues, and no eigenvectors have been computed; + elements and i+1:N of W contain eigenvalues which have + converged. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, "V"); + wantvr = lsame_(jobvr, "V"); + if (! wantvl && ! lsame_(jobvl, "N")) { + *info = -1; + } else if (! wantvr && ! lsame_(jobvr, "N")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to real + workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV. + HSWORK refers to the workspace preferred by ZHSEQR, as + calculated below. HSWORK is computed assuming ILO=1 and IHI=N, + the worst case.) +*/ + + minwrk = 1; + if (*info == 0 && (*lwork >= 1 || lquery)) { + maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, ( + ftnlen)6, (ftnlen)1); + if (! wantvl && ! wantvr) { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "ZHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); + maxwrk = max(maxwrk,hswork); + } else { +/* Computing MAX */ + i__1 = 1, i__2 = *n << 1; + minwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR", + " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = ilaenv_(&c__8, "ZHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) + 6, (ftnlen)2); + maxb = max(i__1,2); +/* + Computing MIN + Computing MAX +*/ + i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SV", n, &c__1, n, & + c_n1, (ftnlen)6, (ftnlen)2); + i__1 = min(maxb,*n), i__2 = max(i__3,i__4); + k = min(i__1,i__2); +/* Computing MAX */ + i__1 = k * (k + 2), i__2 = *n << 1; + hswork = max(i__1,i__2); +/* Computing MAX */ + i__1 = max(maxwrk,hswork), i__2 = *n << 1; + maxwrk = max(i__1,i__2); + } + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + } + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEEV ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", n, n, &a[a_offset], lda, dum); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & + ierr); + } + +/* + Balance the matrix + (CWorkspace: none) + (RWorkspace: need N) +*/ + + ibal = 1; + zgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); + +/* + Reduce to upper Hessenberg form + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: none) +*/ + + itau = 1; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + zgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, + &ierr); + + if (wantvl) { + +/* + Want left eigenvectors + Copy Householder vectors to VL +*/ + + *(unsigned char *)side = 'L'; + zlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) + ; + +/* + Generate unitary matrix in VL + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + zunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VL + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ + vl_offset], ldvl, &work[iwrk], &i__1, info); + + if (wantvr) { + +/* + Want left and right eigenvectors + Copy Schur vectors to VR +*/ + + *(unsigned char *)side = 'B'; + zlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); + } + + } else if (wantvr) { + +/* + Want right eigenvectors + Copy Householder vectors to VR +*/ + + *(unsigned char *)side = 'R'; + zlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) + ; + +/* + Generate unitary matrix in VR + (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) + (RWorkspace: none) +*/ + + i__1 = *lwork - iwrk + 1; + zunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], + &i__1, &ierr); + +/* + Perform QR iteration, accumulating Schur vectors in VR + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + + } else { + +/* + Compute eigenvalues only + (CWorkspace: need 1, prefer HSWORK (see comments) ) + (RWorkspace: none) +*/ + + iwrk = itau; + i__1 = *lwork - iwrk + 1; + zhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ + vr_offset], ldvr, &work[iwrk], &i__1, info); + } + +/* If INFO > 0 from ZHSEQR, then quit */ + + if (*info > 0) { + goto L50; + } + + if (wantvl || wantvr) { + +/* + Compute left and/or right eigenvectors + (CWorkspace: need 2*N) + (RWorkspace: need 2*N) +*/ + + irwork = ibal + *n; + ztrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], + &ierr); + } + + if (wantvl) { + +/* + Undo balancing of left eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + zgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], + ldvl, &ierr); + +/* Normalize left eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1. / dznrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + zdscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vl_dim1; +/* Computing 2nd power */ + d__1 = vl[i__3].r; +/* Computing 2nd power */ + d__2 = d_imag(&vl[k + i__ * vl_dim1]); + rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L10: */ + } + k = idamax_(n, &rwork[irwork], &c__1); + d_cnjg(&z__2, &vl[k + i__ * vl_dim1]); + d__1 = sqrt(rwork[irwork + k - 1]); + z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; + tmp.r = z__1.r, tmp.i = z__1.i; + zscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); + i__2 = k + i__ * vl_dim1; + i__3 = k + i__ * vl_dim1; + d__1 = vl[i__3].r; + z__1.r = d__1, z__1.i = 0.; + vl[i__2].r = z__1.r, vl[i__2].i = z__1.i; +/* L20: */ + } + } + + if (wantvr) { + +/* + Undo balancing of right eigenvectors + (CWorkspace: none) + (RWorkspace: need N) +*/ + + zgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], + ldvr, &ierr); + +/* Normalize right eigenvectors and make largest component real */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scl = 1. / dznrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + zdscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * vr_dim1; +/* Computing 2nd power */ + d__1 = vr[i__3].r; +/* Computing 2nd power */ + d__2 = d_imag(&vr[k + i__ * vr_dim1]); + rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; +/* L30: */ + } + k = idamax_(n, &rwork[irwork], &c__1); + d_cnjg(&z__2, &vr[k + i__ * vr_dim1]); + d__1 = sqrt(rwork[irwork + k - 1]); + z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; + tmp.r = z__1.r, tmp.i = z__1.i; + zscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); + i__2 = k + i__ * vr_dim1; + i__3 = k + i__ * vr_dim1; + d__1 = vr[i__3].r; + z__1.r = d__1, z__1.i = 0.; + vr[i__2].r = z__1.r, vr[i__2].i = z__1.i; +/* L40: */ + } + } + +/* Undo scaling if necessary */ + +L50: + if (scalea) { + i__1 = *n - *info; +/* Computing MAX */ + i__3 = *n - *info; + i__2 = max(i__3,1); + zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] + , &i__2, &ierr); + if (*info > 0) { + i__1 = ilo - 1; + zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, + &ierr); + } + } + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + return 0; + +/* End of ZGEEV */ + +} /* zgeev_ */ + +/* Subroutine */ int zgehd2_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= max(1,N). + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the n by n general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEHD2", &i__1); + return 0; + } + + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + +/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *ihi - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ + i__]); + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ + + i__2 = *ihi - i__; + zlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); + +/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ + + i__2 = *ihi - i__; + i__3 = *n - i__; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; +/* L10: */ + } + + return 0; + +/* End of ZGEHD2 */ + +} /* zgehd2_ */ + +/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer ib; + static doublecomplex ei; + static integer nb, nh, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zgehd2_(integer *, integer *, integer + *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), + zlahrd_(integer *, integer *, integer *, doublecomplex *, integer + *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGEHRD reduces a complex general matrix A to upper Hessenberg form H + by a unitary similarity transformation: Q' * A * Q = H . + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that A is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL; otherwise they should be + set to 1 and N respectively. See Further Details. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N general matrix to be reduced. + On exit, the upper triangle and the first subdiagonal of A + are overwritten with the upper Hessenberg matrix H, and the + elements below the first subdiagonal, with the array TAU, + represent the unitary matrix Q as a product of elementary + reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to + zero. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + The matrix Q is represented as a product of (ihi-ilo) elementary + reflectors + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on + exit in A(i+2:ihi,i), and tau in TAU(i). + + The contents of A are illustrated by the following example, with + n = 7, ilo = 2 and ihi = 6: + + on entry, on exit, + + ( a a a a a a a ) ( a a h h h h a ) + ( a a a a a a ) ( a h h h h a ) + ( a a a a a a ) ( h h h h h h ) + ( a a a a a a ) ( v2 h h h h h ) + ( a a a a a a ) ( v2 v3 h h h h ) + ( a a a a a a ) ( v2 v3 v4 h h h ) + ( a ) ( a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; +/* Computing MIN */ + i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = min(i__1,i__2); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEHRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0., tau[i__2].i = 0.; +/* L10: */ + } + i__1 = *n - 1; + for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { + i__2 = i__; + tau[i__2].r = 0., tau[i__2].i = 0.; +/* L20: */ + } + +/* Quick return if possible */ + + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + iws = 1; + if (nb > 1 && nb < nh) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < nh) { + +/* Determine if workspace is large enough for blocked code. */ + + iws = *n * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code. + + Computing MAX +*/ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEHRD", " ", n, ilo, ihi, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + if (*lwork >= *n * nbmin) { + nb = *lwork / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + + if (nb < nbmin || nb >= nh) { + +/* Use unblocked code below */ + + i__ = *ilo; + + } else { + +/* Use blocked code */ + + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3,i__4); + +/* + Reduce columns i:i+ib-1 to Hessenberg form, returning the + matrices V and T of the block reflector H = I - V*T*V' + which performs the reduction, and also the matrix Y = A*V*T +*/ + + zlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & + c__65, &work[1], &ldwork); + +/* + Apply the block reflector H to A(1:ihi,i+ib:ihi) from the + right, computing A := A - Y * V'. V(i+ib,ib-1) must be set + to 1. +*/ + + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + ei.r = a[i__3].r, ei.i = a[i__3].i; + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + i__3 = *ihi - i__ - ib + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & + z__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, + &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda); + i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; + a[i__3].r = ei.r, a[i__3].i = ei.i; + +/* + Apply the block reflector H to A(i+1:ihi,i+ib:n) from the + left +*/ + + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & + i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & + c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & + ldwork); +/* L30: */ + } + } + +/* Use unblocked code to reduce the rest of the matrix */ + + zgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1].r = (doublereal) iws, work[1].i = 0.; + + return 0; + +/* End of ZGEHRD */ + +} /* zgehrd_ */ + +/* Subroutine */ int zgelq2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, k; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGELQ2 computes an LQ factorization of a complex m by n matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and below the diagonal of the array + contain the m by min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELQ2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] + ); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); +/* L10: */ + } + return 0; + +/* End of ZGELQ2 */ + +} /* zgelq2_ */ + +/* Subroutine */ int zgelqf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgelq2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGELQF computes an LQ factorization of a complex M-by-N matrix A: + A = L * Q. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and below the diagonal of the array + contain the m-by-min(m,n) lower trapezoidal matrix L (L is + lower triangular if m <= n); the elements above the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in + A(i,i+1:n), and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELQF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZGELQF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELQF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the LQ factorization of the current block + A(i:i+ib-1,i:n) +*/ + + i__3 = *n - i__ + 1; + zgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *n - i__ + 1; + zlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + zlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGELQF */ + +} /* zgelqf_ */ + +/* Subroutine */ int zgelsd_(integer *m, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer ie, il, mm; + static doublereal eps, anrm, bnrm; + static integer itau, iascl, ibscl; + static doublereal sfmin; + static integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *), + xerbla_(char *, integer *), zgebrd_(integer *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + static doublereal bignum; + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + doublecomplex *, doublereal *, integer *, integer *), + zlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublecomplex *, integer *, integer *), zgeqrf_(integer *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static integer minwrk, maxwrk; + static doublereal smlnum; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ); + static logical lquery; + static integer nrwork, smlsiz; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + ZGELSD computes the minimum-norm solution to a real linear least + squares problem: + minimize 2-norm(| b - A*x |) + using the singular value decomposition (SVD) of A. A is an M-by-N + matrix which may be rank-deficient. + + Several right hand side vectors b and solution vectors x can be + handled in a single call; they are stored as the columns of the + M-by-NRHS right hand side matrix B and the N-by-NRHS solution + matrix X. + + The problem is solved in three steps: + (1) Reduce the coefficient matrix A to bidiagonal form with + Householder tranformations, reducing the original problem + into a "bidiagonal least squares problem" (BLS) + (2) Solve the BLS using a divide and conquer approach. + (3) Apply back all the Householder tranformations to solve + the original least squares problem. + + The effective rank of A is determined by treating as zero those + singular values which are less than RCOND times the largest singular + value. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrices B and X. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, A has been destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the M-by-NRHS right hand side matrix B. + On exit, B is overwritten by the N-by-NRHS solution matrix X. + If m >= n and RANK = n, the residual sum-of-squares for + the solution in the i-th column is given by the sum of + squares of elements n+1:m in that column. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M,N). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A in decreasing order. + The condition number of A in the 2-norm = S(1)/S(min(m,n)). + + RCOND (input) DOUBLE PRECISION + RCOND is used to determine the effective rank of A. + Singular values S(i) <= RCOND*S(1) are treated as zero. + If RCOND < 0, machine precision is used instead. + + RANK (output) INTEGER + The effective rank of A, i.e., the number of singular values + which are greater than RCOND*S(1). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK must be at least 1. + The exact minimum amount of workspace needed depends on M, + N and NRHS. As long as LWORK is at least + 2 * N + N * NRHS + if M is greater than or equal to N or + 2 * M + M * NRHS + if M is less than N, the code will execute correctly. + For good performance, LWORK should generally be larger. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace) DOUBLE PRECISION array, dimension at least + 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + (SMLSIZ+1)**2 + if M is greater than or equal to N or + 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + (SMLSIZ+1)**2 + if M is less than N, the code will execute correctly. + SMLSIZ is returned by ILAENV and is equal to the maximum + size of the subproblems at the bottom of the computation + tree (usually about 25), and + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + + IWORK (workspace) INTEGER array, dimension (LIWORK) + LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, + where MINMN = MIN( M,N ). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: the algorithm for computing the SVD failed to converge; + if INFO = i, i off-diagonal elements of an intermediate + bidiagonal form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input arguments. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( + ftnlen)1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + + smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Compute workspace. + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + NB refers to the optimal block size for the immediately + following subroutine, as returned by ILAENV.) +*/ + + minwrk = 1; + if (*info == 0) { + maxwrk = 0; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC", m, + nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* + Path 1 - overdetermined or exactly determined. + + Computing MAX +*/ + i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1, + "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1) + ; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, "ZUNMBR", + "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "ZUN" + "MBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs; + minwrk = max(i__1,i__2); + } + if (*n > *m) { + if (*n >= mnthr) { + +/* + Path 2a - underdetermined, with many more columns + than rows. +*/ + + maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& + c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, ( + ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - underdetermined. */ + + maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1, + "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, "ZUNMBR" + , "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs; + minwrk = max(i__1,i__2); + } + minwrk = min(minwrk,maxwrk); + d__1 = (doublereal) maxwrk; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGELSD", &i__1); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = PRECISION; + sfmin = SAFEMINIMUM; + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, &rwork[1]); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); + dlaset_("F", &minmn, &c__1, &c_b324, &c_b324, &s[1], &c__1) + ; + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = zlange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + zlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info); + ibscl = 2; + } + +/* If M < N make sure B(M+1:N,:) = 0 */ + + if (*m < *n) { + i__1 = *n - *m; + zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], ldb); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R. + (RWorkspace: need N) + (CWorkspace: need N, prefer N*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* + Multiply B by transpose(Q). + (RWorkspace: need N) + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2], + lda); + } + } + + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + ie = 1; + nrwork = ie + *n; + +/* + Bidiagonalize R in A. + (RWorkspace: need N) + (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of R. + (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { + +/* + Path 2a - underdetermined, with many more columns than rows + and sufficient workspace for an efficient algorithm. +*/ + + ldwork = *m; +/* + Computing MAX + Computing MAX +*/ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* + Compute A=L*Q. + (CWorkspace: need 2*M, prefer M+M*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwork], & + ldwork); + itauq = il + ldwork * *m; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize L in WORK(IL). + (RWorkspace: need M) + (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors of L. + (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], + ldb); + nwork = itau + *m; + +/* + Multiply transpose(Q) by B. + (CWorkspace: need NRHS, prefer NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + ie = 1; + nrwork = ie + *m; + +/* + Bidiagonalize A. + (RWorkspace: need M) + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* + Multiply B by transpose of left bidiagonalizing vectors. + (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +*/ + + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + +/* Solve the bidiagonal least squares problem. */ + + zlalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], + info); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + zunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } else if (iascl == 2) { + zlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info); + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info); + } + if (ibscl == 1) { + zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } else if (ibscl == 2) { + zlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info); + } + +L10: + d__1 = (doublereal) maxwrk; + z__1.r = d__1, z__1.i = 0.; + work[1].r = z__1.r, work[1].i = z__1.i; + return 0; + +/* End of ZGELSD */ + +} /* zgelsd_ */ + +/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, k; + static doublecomplex alpha; + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGEQR2 computes a QR factorization of a complex m by n matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(m,n) by n upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors (see Further Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQR2", &i__1); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i)' to A(i:m,i+1:n) from the left */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1, + &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + i__2 = i__ + i__ * a_dim1; + a[i__2].r = alpha.r, a[i__2].i = alpha.i; + } +/* L10: */ + } + return 0; + +/* End of ZGEQR2 */ + +} /* zgeqr2_ */ + +/* Subroutine */ int zgeqrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zgeqr2_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZGEQRF computes a QR factorization of a complex M-by-N matrix A: + A = Q * R. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, the elements on and above the diagonal of the array + contain the min(M,N)-by-N upper trapezoidal matrix R (R is + upper triangular if m >= n); the elements below the diagonal, + with the array TAU, represent the unitary matrix Q as a + product of min(m,n) elementary reflectors (see Further + Details). + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + TAU (output) COMPLEX*16 array, dimension (min(M,N)) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The matrix Q is represented as a product of elementary reflectors + + Q = H(1) H(2) . . . H(k), where k = min(m,n). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), + and tau in TAU(i). + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGEQRF", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZGEQRF", " ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEQRF", " ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* + Compute the QR factorization of the current block + A(i:m,i:i+ib-1) +*/ + + i__3 = *m - i__ + 1; + zgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__3 = *m - i__ + 1; + zlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" + , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & + work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, + &work[ib + 1], &ldwork); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + zgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZGEQRF */ + +} /* zgeqrf_ */ + +/* Subroutine */ int zgesdd_(char *jobz, integer *m, integer *n, + doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, + integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2, i__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer i__, ie, il, ir, iu, blk; + static doublereal dum[1], eps; + static integer iru, ivt, iscl; + static doublereal anrm; + static integer idum[1], ierr, itau, irvt; + extern logical lsame_(char *, char *); + static integer chunk, minmn; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer wrkbl, itaup, itauq; + static logical wntqa; + static integer nwork; + static logical wntqn, wntqo, wntqs; + extern /* Subroutine */ int zlacp2_(char *, integer *, integer *, + doublereal *, integer *, doublecomplex *, integer *); + static integer mnthr1, mnthr2; + extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), xerbla_(char *, integer *), + zgebrd_(integer *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *, doublecomplex *, doublecomplex *, + doublecomplex *, integer *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static doublereal bignum; + extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, + integer *, doublereal *); + extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal *) + , zlarcm_(integer *, integer *, doublereal *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zgeqrf_(integer *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, integer * + ); + static integer ldwrkl; + extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), + zlaset_(char *, integer *, integer *, doublecomplex *, + doublecomplex *, doublecomplex *, integer *); + static integer ldwrkr, minwrk, ldwrku, maxwrk; + extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer + *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + static integer ldwkvt; + static doublereal smlnum; + static logical wntqas; + extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer * + ), zunglq_(integer *, integer *, integer * + , doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + static logical lquery; + static integer nrwork; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + ZGESDD computes the singular value decomposition (SVD) of a complex + M-by-N matrix A, optionally computing the left and/or right singular + vectors, by using divide-and-conquer method. The SVD is written + + A = U * SIGMA * conjugate-transpose(V) + + where SIGMA is an M-by-N matrix which is zero except for its + min(m,n) diagonal elements, U is an M-by-M unitary matrix, and + V is an N-by-N unitary matrix. The diagonal elements of SIGMA + are the singular values of A; they are real and non-negative, and + are returned in descending order. The first min(m,n) columns of + U and V are the left and right singular vectors of A. + + Note that the routine returns VT = V**H, not V. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + Specifies options for computing all or part of the matrix U: + = 'A': all M columns of U and all N rows of V**H are + returned in the arrays U and VT; + = 'S': the first min(M,N) columns of U and the first + min(M,N) rows of V**H are returned in the arrays U + and VT; + = 'O': If M >= N, the first N columns of U are overwritten + on the array A and all rows of V**H are returned in + the array VT; + otherwise, all columns of U are returned in the + array U and the first M rows of V**H are overwritten + in the array VT; + = 'N': no columns of U or rows of V**H are computed. + + M (input) INTEGER + The number of rows of the input matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the input matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix A. + On exit, + if JOBZ = 'O', A is overwritten with the first N columns + of U (the left singular vectors, stored + columnwise) if M >= N; + A is overwritten with the first M rows + of V**H (the right singular vectors, stored + rowwise) otherwise. + if JOBZ .ne. 'O', the contents of A are destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + S (output) DOUBLE PRECISION array, dimension (min(M,N)) + The singular values of A, sorted so that S(i) >= S(i+1). + + U (output) COMPLEX*16 array, dimension (LDU,UCOL) + UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; + UCOL = min(M,N) if JOBZ = 'S'. + If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M + unitary matrix U; + if JOBZ = 'S', U contains the first min(M,N) columns of U + (the left singular vectors, stored columnwise); + if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. + + LDU (input) INTEGER + The leading dimension of the array U. LDU >= 1; if + JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. + + VT (output) COMPLEX*16 array, dimension (LDVT,N) + If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the + N-by-N unitary matrix V**H; + if JOBZ = 'S', VT contains the first min(M,N) rows of + V**H (the right singular vectors, stored rowwise); + if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. + + LDVT (input) INTEGER + The leading dimension of the array VT. LDVT >= 1; if + JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; + if JOBZ = 'S', LDVT >= min(M,N). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). + if JOBZ = 'O', + LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + if JOBZ = 'S' or 'A', + LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + For good performance, LWORK should generally be larger. + If LWORK < 0 but other input arguments are legal, WORK(1) + returns the optimal LWORK. + + RWORK (workspace) DOUBLE PRECISION array, dimension (LRWORK) + If JOBZ = 'N', LRWORK >= 7*min(M,N). + Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) + + IWORK (workspace) INTEGER array, dimension (8*min(M,N)) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The updating process of DBDSDC did not converge. + + Further Details + =============== + + Based on contributions by + Ming Gu and Huan Ren, Computer Science Division, University of + California at Berkeley, USA + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + mnthr1 = (integer) (minmn * 17. / 9.); + mnthr2 = (integer) (minmn * 5. / 3.); + wntqa = lsame_(jobz, "A"); + wntqs = lsame_(jobz, "S"); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, "O"); + wntqn = lsame_(jobz, "N"); + minwrk = 1; + maxwrk = 1; + lquery = *lwork == -1; + + if (! (wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * + m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + +/* + Compute workspace + (Note: Comments in the code beginning "Workspace:" describe the + minimal amount of workspace needed at that point in the code, + as well as the preferred amount for good performance. + CWorkspace refers to complex workspace, and RWorkspace to + real workspace. NB refers to the optimal block size for the + immediately following subroutine, as returned by ILAENV.) +*/ + + if (*info == 0 && *m > 0 && *n > 0) { + if (*m >= *n) { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC, + BDSPAC = 3*N*N + 4*N +*/ + + if (*m >= mnthr1) { + if (wntqn) { + +/* Path 1 (M much larger than N, JOBZ='N') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); + maxwrk = wrkbl; + minwrk = *n * 3; + } else if (wntqo) { + +/* Path 2 (M much larger than N, JOBZ='O') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *n * *n + wrkbl; + minwrk = (*n << 1) * *n + *n * 3; + } else if (wntqs) { + +/* Path 3 (M much larger than N, JOBZ='S') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", + " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + *n * 3; + } else if (wntqa) { + +/* Path 4 (M much larger than N, JOBZ='A') */ + + wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "ZUNGQR", + " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *n * *n + wrkbl; + minwrk = *n * *n + (*n << 1) + *m; + } + } else if (*m >= mnthr2) { + +/* Path 5 (M much larger than N, but not as much as MNTHR1) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6 (M at least N, but not much larger) */ + + maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*n << 1) + *m; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *n * *n; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } else { + +/* + There is no complex work space needed for bidiagonal SVD + The real work space needed for bidiagonal SVD is BDSPAC, + BDSPAC = 3*M*M + 4*M +*/ + + if (*n >= mnthr1) { + if (wntqn) { + +/* Path 1t (N much larger than M, JOBZ='N') */ + + maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + maxwrk = max(i__1,i__2); + minwrk = *m * 3; + } else if (wntqo) { + +/* Path 2t (N much larger than M, JOBZ='O') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *n + *m * *m + wrkbl; + minwrk = (*m << 1) * *m + *m * 3; + } else if (wntqs) { + +/* Path 3t (N much larger than M, JOBZ='S') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", + " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + *m * 3; + } else if (wntqa) { + +/* Path 4t (N much larger than M, JOBZ='A') */ + + wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & + c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "ZUNGLQ", + " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& + c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) + 6, (ftnlen)1); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); +/* Computing MAX */ + i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + wrkbl = max(i__1,i__2); + maxwrk = *m * *m + wrkbl; + minwrk = *m * *m + (*m << 1) + *n; + } + } else if (*n >= mnthr2) { + +/* Path 5t (N much larger than M, but not as much as MNTHR1) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) + 1); + maxwrk = max(i__1,i__2); + } + } else { + +/* Path 6t (N greater than M, but not much larger) */ + + maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", + " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + minwrk = (*m << 1) + *n; + if (wntqo) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + maxwrk += *m * *n; + minwrk += *m * *m; + } else if (wntqs) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } else if (wntqa) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, + "ZUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, + "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); + } + } + } + maxwrk = max(maxwrk,minwrk); + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + } + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESDD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + if (*lwork >= 1) { + work[1].r = 1., work[1].i = 0.; + } + return 0; + } + +/* Get machine constants */ + + eps = PRECISION; + smlnum = sqrt(SAFEMINIMUM) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = zlange_("M", m, n, &a[a_offset], lda, dum); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr); + } else if (anrm > bignum) { + iscl = 1; + zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr); + } + + if (*m >= *n) { + +/* + A has at least as many rows as columns. If A has sufficiently + more rows than columns, first reduce using the QR + decomposition (if sufficient workspace available) +*/ + + if (*m >= mnthr1) { + + if (wntqn) { + +/* + Path 1 (M much larger than N, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Zero out below R */ + + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + nrwork = ie + *n; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2 (M much larger than N, JOBZ='O') + N left singular vectors to be overwritten on A and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + if (*lwork >= *m * *n + *n * *n + *n * 3) { + +/* WORK(IR) is M by N */ + + ldwrkr = *m; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy R to WORK( IR ), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__1 = *n - 1; + i__2 = *n - 1; + zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of R in WORK(IRU) and computing right singular vectors + of R in WORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of R + (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by the right singular vectors of R + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IU), storing result in WORK(IR) and copying to A + (CWorkspace: need 2*N*N, prefer N*N+M*N) + (RWorkspace: 0) +*/ + + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrkr); + zgemm_("N", "N", &chunk, n, n, &c_b60, &a[i__ + a_dim1], + lda, &work[iu], &ldwrku, &c_b59, &work[ir], & + ldwrkr); + zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + + a_dim1], lda); +/* L10: */ + } + + } else if (wntqs) { + +/* + Path 3 (M much larger than N, JOBZ='S') + N left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + ir = 1; + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + +/* + Compute A=Q*R + (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &work[ir + 1], & + ldwrkr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in WORK(IR) + (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in A by left singular vectors of R in + WORK(IR), storing result in U + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); + zgemm_("N", "N", m, n, n, &c_b60, &a[a_offset], lda, &work[ir] + , &ldwrkr, &c_b59, &u[u_offset], ldu); + + } else if (wntqa) { + +/* + Path 4 (M much larger than N, JOBZ='A') + M left singular vectors to be computed in U and + N right singular vectors to be computed in VT +*/ + + iu = 1; + +/* WORK(IU) is N by N */ + + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + +/* + Compute A=Q*R, copying result to U + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Generate Q in U + (CWorkspace: need N+M, prefer N+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], + &i__2, &ierr); + +/* Produce R in A, zeroing out below it */ + + i__2 = *n - 1; + i__1 = *n - 1; + zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &a[a_dim1 + 2], + lda); + ie = 1; + itauq = itau; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize R in A + (CWorkspace: need 3*N, prefer 2*N+2*N*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + iru = ie + *n; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of R + (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & + ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of R + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply Q in U by left singular vectors of R in + WORK(IU), storing result in A + (CWorkspace: need N*N) + (RWorkspace: 0) +*/ + + zgemm_("N", "N", m, n, n, &c_b60, &u[u_offset], ldu, &work[iu] + , &ldwrku, &c_b59, &a[a_offset], lda); + +/* Copy left singular vectors of A from A to U */ + + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + + } + + } else if (*m >= mnthr2) { + +/* + MNTHR2 <= M < MNTHR1 + + Path 5 (M much larger than N, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + ZUNGBR and matrix multiplication to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Generate Q in A + (CWorkspace: need 2*N, prefer N+N*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK(IU) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in WORK(IU), copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] + , &ldwrku, &rwork[nrwork]); + zlacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__2 = *m; + i__1 = ldwrku; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, + &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L20: */ + } + + } else if (wntqs) { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need N*N+2*M*N) +*/ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } else { + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__1, &ierr); + +/* + Copy A to U, generate Q + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need 3*N*N) +*/ + + zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: 0) + (Rworkspace: need 3*N*N) +*/ + + nrwork = irvt; + zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); + } + + } else { + +/* + M .LT. MNTHR2 + + Path 6 (M at least N, but not much larger) + Reduce to bidiagonal form without QR decomposition + Use ZUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *n; + itauq = 1; + itaup = itauq + *n; + nwork = itaup + *n; + +/* + Bidiagonalize A + (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) + (RWorkspace: need N) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + iu = nwork; + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + if (*lwork >= *m * *n + *n * 3) { + +/* WORK( IU ) is M by N */ + + ldwrku = *m; + } else { + +/* WORK( IU ) is LDWRKU by N */ + + ldwrku = (*lwork - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + + if (*lwork >= *m * *n + *n * 3) { + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by left singular vectors of A, copying + to A + (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) + (Rworkspace: need 0) +*/ + + zlaset_("F", m, n, &c_b59, &c_b59, &work[iu], &ldwrku); + zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & + ierr); + zlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); + } else { + +/* + Generate Q in A + (Cworkspace: need 2*N, prefer N+N*NB) + (Rworkspace: need 0) +*/ + + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[nwork], &i__1, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need N*N, prefer M*N) + (Rworkspace: need 3*N*N, prefer N*N+2*M*N) +*/ + + nrwork = irvt; + i__1 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *m - i__ + 1; + chunk = min(i__3,ldwrku); + zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], + n, &work[iu], &ldwrku, &rwork[nrwork]); + zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda); +/* L30: */ + } + } + + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlaset_("F", m, n, &c_b59, &c_b59, &u[u_offset], ldu); + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = nrwork; + irvt = iru + *n * *n; + nrwork = irvt + *n * *n; + dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & + rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* Set the right corner of U to identity matrix */ + + zlaset_("F", m, m, &c_b59, &c_b59, &u[u_offset], ldu); + i__2 = *m - *n; + i__1 = *m - *n; + zlaset_("F", &i__2, &i__1, &c_b59, &c_b60, &u[*n + 1 + (*n + + 1) * u_dim1], ldu); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 2*N+M, prefer 2*N+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*N, prefer 2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & + ierr); + } + + } + + } else { + +/* + A has more columns than rows. If A has sufficiently more + columns than rows, first reduce using the LQ decomposition + (if sufficient workspace available) +*/ + + if (*n >= mnthr1) { + + if (wntqn) { + +/* + Path 1t (N much larger than M, JOBZ='N') + No singular vectors to be computed +*/ + + itau = 1; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need 3*M, prefer 2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + nrwork = ie + *m; + +/* + Perform bidiagonal SVD, compute singular values only + (CWorkspace: 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + + } else if (wntqo) { + +/* + Path 2t (N much larger than M, JOBZ='O') + M right singular vectors to be overwritten on A and + M left singular vectors to be computed in U +*/ + + ivt = 1; + ldwkvt = *m; + +/* WORK(IVT) is M by M */ + + il = ivt + ldwkvt * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3) { + +/* WORK(IL) M by N */ + + ldwrkl = *m; + chunk = *n; + } else { + +/* WORK(IL) is M by CHUNK */ + + ldwrkl = *m; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + itau = il + ldwrkl * chunk; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__2, &ierr); + +/* Copy L to WORK(IL), zeroing about above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__2 = *m - 1; + i__1 = *m - 1; + zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__2 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__2, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix WORK(IU) + Overwrite WORK(IU) by the left singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by the right singular vectors of L + (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IL) by Q + in A, storing result in WORK(IL) and copying to A + (CWorkspace: need 2*M*M, prefer M*M+M*N)) + (RWorkspace: 0) +*/ + + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zgemm_("N", "N", m, &blk, m, &c_b60, &work[ivt], m, &a[ + i__ * a_dim1 + 1], lda, &c_b59, &work[il], & + ldwrkl); + zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + + 1], lda); +/* L40: */ + } + + } else if (wntqs) { + +/* + Path 3t (N much larger than M, JOBZ='S') + M right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + il = 1; + +/* WORK(IL) is M by M */ + + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + +/* + Compute A=L*Q + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + +/* Copy L to WORK(IL), zeroing out above it */ + + zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwrkl], + &ldwrkl); + +/* + Generate Q in A + (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], + &i__1, &ierr); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in WORK(IL) + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by left singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + +/* + Copy VT to WORK(IL), multiply right singular vectors of L + in WORK(IL) by Q in A, storing result in VT + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); + zgemm_("N", "N", m, n, m, &c_b60, &work[il], &ldwrkl, &a[ + a_offset], lda, &c_b59, &vt[vt_offset], ldvt); + + } else if (wntqa) { + +/* + Path 9t (N much larger than M, JOBZ='A') + N right singular vectors to be computed in VT and + M left singular vectors to be computed in U +*/ + + ivt = 1; + +/* WORK(IVT) is M by M */ + + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + +/* + Compute A=L*Q, copying result to VT + (CWorkspace: need 2*M, prefer M+M*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & + i__1, &ierr); + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + +/* + Generate Q in VT + (CWorkspace: need M+N, prefer M+N*NB) + (RWorkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ + nwork], &i__1, &ierr); + +/* Produce L in A, zeroing out above it */ + + i__1 = *m - 1; + i__2 = *m - 1; + zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[(a_dim1 << 1) + + 1], lda); + ie = 1; + itauq = itau; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize L in A + (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) + (RWorkspace: need M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ + itauq], &work[itaup], &work[nwork], &i__1, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + iru = ie + *m; + irvt = iru + *m * *m; + nrwork = irvt + *m * *m; + dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of L + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of L + (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) + (RWorkspace: 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & + ierr); + +/* + Multiply right singular vectors of L in WORK(IVT) by + Q in VT, storing result in A + (CWorkspace: need M*M) + (RWorkspace: 0) +*/ + + zgemm_("N", "N", m, n, m, &c_b60, &work[ivt], &ldwkvt, &vt[ + vt_offset], ldvt, &c_b59, &a[a_offset], lda); + +/* Copy right singular vectors of A from A to VT */ + + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + + } + + } else if (*n >= mnthr2) { + +/* + MNTHR2 <= N < MNTHR1 + + Path 5t (N much larger than M, but not as much as MNTHR1) + Reduce to bidiagonal form without QR decomposition, use + ZUNGBR and matrix multiplication to compute singular vectors +*/ + + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__1 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, &ierr); + + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + ivt = nwork; + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__1, &ierr); + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + i__1 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + nwork], &i__1, &ierr); + + ldwkvt = *m; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + nwork = ivt + ldwkvt * *n; + chunk = *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRVT) + storing the result in WORK(IVT), copying to U + (Cworkspace: need 0) + (Rworkspace: need 2*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & + ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); + +/* + Multiply RWORK(IRVT) by P**H in A, storing the + result in WORK(IVT), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 2*M*M, prefer 2*M*N) +*/ + + nrwork = iru; + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], + lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L50: */ + } + } else if (wntqs) { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + nrwork = iru; + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } else { + +/* + Copy A to U, generate Q + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + nwork], &i__2, &ierr); + +/* + Copy A to VT, generate P**H + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: 0) +*/ + + zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + i__2 = *lwork - nwork + 1; + zungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Multiply Q in U by real matrix RWORK(IRU), storing the + result in A, copying to U + (CWorkspace: need 0) + (Rworkspace: need 3*M*M) +*/ + + zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], + lda, &rwork[nrwork]); + zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); + +/* + Multiply real matrix RWORK(IRVT) by P**H in VT, + storing the result in A, copying to VT + (Cworkspace: need 0) + (Rworkspace: need M*M+2*M*N) +*/ + + zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ + a_offset], lda, &rwork[nrwork]); + zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); + } + + } else { + +/* + N .LT. MNTHR2 + + Path 6t (N greater than M, but not much larger) + Reduce to bidiagonal form without LQ decomposition + Use ZUNMBR to compute singular vectors +*/ + + ie = 1; + nrwork = ie + *m; + itauq = 1; + itaup = itauq + *m; + nwork = itaup + *m; + +/* + Bidiagonalize A + (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) + (RWorkspace: M) +*/ + + i__2 = *lwork - nwork + 1; + zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], + &work[itaup], &work[nwork], &i__2, &ierr); + if (wntqn) { + +/* + Compute singular values only + (Cworkspace: 0) + (Rworkspace: need BDSPAC) +*/ + + dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & + c__1, dum, idum, &rwork[nrwork], &iwork[1], info); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3) { + +/* WORK( IVT ) is M by N */ + + zlaset_("F", m, n, &c_b59, &c_b59, &work[ivt], &ldwkvt); + nwork = ivt + ldwkvt * *n; + } else { + +/* WORK( IVT ) is M by CHUNK */ + + chunk = (*lwork - *m * 3) / *m; + nwork = ivt + ldwkvt * chunk; + } + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__2 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); + + if (*lwork >= *m * *n + *m * 3) { + +/* + Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) + Overwrite WORK(IVT) by right singular vectors of A, + copying to A + (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) + (Rworkspace: need 0) +*/ + + zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); + i__2 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, + &ierr); + zlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); + } else { + +/* + Generate P**H in A + (Cworkspace: need 2*M, prefer M+M*NB) + (Rworkspace: need 0) +*/ + + i__2 = *lwork - nwork + 1; + zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[nwork], &i__2, &ierr); + +/* + Multiply Q in A by real matrix RWORK(IRU), storing the + result in WORK(IU), copying to A + (CWorkspace: need M*M, prefer M*N) + (Rworkspace: need 3*M*M, prefer M*M+2*M*N) +*/ + + nrwork = iru; + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *n - i__ + 1; + blk = min(i__3,chunk); + zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] + , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); + zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * + a_dim1 + 1], lda); +/* L60: */ + } + } + } else if (wntqs) { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlaset_("F", m, n, &c_b59, &c_b59, &vt[vt_offset], ldvt); + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } else { + +/* + Perform bidiagonal SVD, computing left singular vectors + of bidiagonal matrix in RWORK(IRU) and computing right + singular vectors of bidiagonal matrix in RWORK(IRVT) + (CWorkspace: need 0) + (RWorkspace: need BDSPAC) +*/ + + irvt = nrwork; + iru = irvt + *m * *m; + nrwork = iru + *m * *m; + + dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & + rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], + info); + +/* + Copy real matrix RWORK(IRU) to complex matrix U + Overwrite U by left singular vectors of A + (CWorkspace: need 3*M, prefer 2*M+M*NB) + (RWorkspace: M*M) +*/ + + zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); + i__1 = *lwork - nwork + 1; + zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ + itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); + +/* Set the right corner of VT to identity matrix */ + + i__1 = *n - *m; + i__2 = *n - *m; + zlaset_("F", &i__1, &i__2, &c_b59, &c_b60, &vt[*m + 1 + (*m + + 1) * vt_dim1], ldvt); + +/* + Copy real matrix RWORK(IRVT) to complex matrix VT + Overwrite VT by right singular vectors of A + (CWorkspace: need 2*M+N, prefer 2*M+N*NB) + (RWorkspace: M*M) +*/ + + zlaset_("F", n, n, &c_b59, &c_b59, &vt[vt_offset], ldvt); + zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); + i__1 = *lwork - nwork + 1; + zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ + itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & + ierr); + } + + } + + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + if (anrm < smlnum) { + dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1].r = (doublereal) maxwrk, work[1].i = 0.; + + return 0; + +/* End of ZGESDD */ + +} /* zgesdd_ */ + +/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a, + integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * + info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int xerbla_(char *, integer *), zgetrf_( + integer *, integer *, doublecomplex *, integer *, integer *, + integer *), zgetrs_(char *, integer *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + ZGESV computes the solution to a complex system of linear equations + A * X = B, + where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + + The LU decomposition with partial pivoting and row interchanges is + used to factor A as + A = P * L * U, + where P is a permutation matrix, L is unit lower triangular, and U is + upper triangular. The factored form of A is then used to solve the + system of equations A * X = B. + + Arguments + ========= + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the N-by-N coefficient matrix A. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (output) INTEGER array, dimension (N) + The pivot indices that define the permutation matrix P; + row i of the matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the N-by-NRHS matrix of right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, so the solution could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGESV ", &i__1); + return 0; + } + +/* Compute the LU factorization of A. */ + + zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info); + } + return 0; + +/* End of ZGESV */ + +} /* zgesv_ */ + +/* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer j, jp; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgeru_(integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), zswap_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGETF2 computes an LU factorization of a general m-by-n matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 2 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, U(k,k) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + i__2 = jp + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + i__2 = *m - j; + z_div(&z__1, &c_b60, &a[j + j * a_dim1]); + zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1); + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) + ; + } +/* L10: */ + } + return 0; + +/* End of ZGETF2 */ + +} /* zgetf2_ */ + +/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, + integer *lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, jb, nb, iinfo; + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer * + , doublecomplex *, integer *), + zgetf2_(integer *, integer *, doublecomplex *, integer *, integer + *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGETRF computes an LU factorization of a general M-by-N matrix A + using partial pivoting with row interchanges. + + The factorization has the form + A = P * L * U + where P is a permutation matrix, L is lower triangular with unit + diagonal elements (lower trapezoidal if m > n), and U is upper + triangular (upper trapezoidal if m < n). + + This is the right-looking Level 3 BLAS version of the algorithm. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the M-by-N matrix to be factored. + On exit, the factors L and U from the factorization + A = P*L*U; the unit diagonal elements of L are not stored. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + IPIV (output) INTEGER array, dimension (min(M,N)) + The pivot indices; for 1 <= i <= min(M,N), row i of the + matrix was interchanged with row IPIV(i). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, U(i,i) is exactly zero. The factorization + has been completed, but the factor U is exactly + singular, and division by zero will occur if it is used + to solve a system of equations. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + zgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* + Factor diagonal and subdiagonal blocks and test for exact + singularity. +*/ + + i__3 = *m - j + 1; + zgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & + c_b60, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, + &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b60, &a[j + jb + (j + jb) * + a_dim1], lda); + } + } +/* L20: */ + } + } + return 0; + +/* End of ZGETRF */ + +} /* zgetrf_ */ + +/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, + integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *); + static logical notran; + extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, + integer *, integer *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZGETRS solves a system of linear equations + A * X = B, A**T * X = B, or A**H * X = B + with a general N-by-N matrix A using the LU factorization computed + by ZGETRF. + + Arguments + ========= + + TRANS (input) CHARACTER*1 + Specifies the form of the system of equations: + = 'N': A * X = B (No transpose) + = 'T': A**T * X = B (Transpose) + = 'C': A**H * X = B (Conjugate transpose) + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The factors L and U from the factorization A = P*L*U + as computed by ZGETRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + IPIV (input) INTEGER array, dimension (N) + The pivot indices from ZGETRF; for 1<=i<=N, row i of the + matrix was interchanged with row IPIV(i). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, "N"); + if (! notran && ! lsame_(trans, "T") && ! lsame_( + trans, "C")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZGETRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* + Solve A * X = B. + + Apply row interchanges to the right hand sides. +*/ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b60, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A**T * X = B or A**H * X = B. + + Solve U'*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b60, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b60, &a[a_offset], + lda, &b[b_offset], ldb); + +/* Apply row interchanges to the solution vectors. */ + + zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of ZGETRS */ + +} /* zgetrs_ */ + +/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal eps; + static integer inde; + static doublereal anrm; + static integer imax; + static doublereal rmin, rmax; + static integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + static doublereal sigma; + extern logical lsame_(char *, char *); + static integer iinfo, lwmin, liopt; + static logical lower; + static integer llrwk, lropt; + static logical wantz; + static integer indwk2, llwrk2; + + static integer iscale; + static doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *); + static integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *); + static integer indrwk, indwrk, liwmin; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + static integer lrwmin, llwork; + static doublereal smlnum; + static logical lquery; + extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a + complex Hermitian matrix A. If eigenvectors are desired, it uses a + divide and conquer algorithm. + + The divide and conquer algorithm makes very mild assumptions about + floating point arithmetic. It will work on machines with a guard + digit in add/subtract, or on those binary machines without guard + digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or + Cray-2. It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + JOBZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only; + = 'V': Compute eigenvalues and eigenvectors. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA, N) + On entry, the Hermitian matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of A contains the + upper triangular part of the matrix A. If UPLO = 'L', + the leading N-by-N lower triangular part of A contains + the lower triangular part of the matrix A. + On exit, if JOBZ = 'V', then if INFO = 0, A contains the + orthonormal eigenvectors of the matrix A. + If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') + or the upper triangle (if UPLO='U') of A, including the + diagonal, is destroyed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + W (output) DOUBLE PRECISION array, dimension (N) + If INFO = 0, the eigenvalues in ascending order. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The length of the array WORK. + If N <= 1, LWORK must be at least 1. + If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. + If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If N <= 1, LRWORK must be at least 1. + If JOBZ = 'N' and N > 1, LRWORK must be at least N. + If JOBZ = 'V' and N > 1, LRWORK must be at least + 1 + 5*N + 2*N**2. + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the RWORK array, + returns this value as the first entry of the RWORK array, and + no error message related to LRWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If N <= 1, LIWORK must be at least 1. + If JOBZ = 'N' and N > 1, LIWORK must be at least 1. + If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the algorithm failed to converge; i + off-diagonal elements of an intermediate tridiagonal + form did not converge to zero. + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, "V"); + lower = lsame_(uplo, "L"); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } + if (! (wantz || lsame_(jobz, "N"))) { + *info = -1; + } else if (! (lower || lsame_(uplo, "U"))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHEEVD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = SAFEMINIMUM; + eps = PRECISION; + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b1015, &sigma, n, n, &a[a_offset], lda, + info); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo); +/* Computing MAX */ + i__1 = indwrk; + d__1 = (doublereal) lopt, d__2 = (doublereal) (*n) + work[i__1].r; + lopt = (integer) max(d__1,d__2); + +/* + For eigenvalues only, call DSTERF. For eigenvectors, first call + ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the + tridiagonal matrix, then call ZUNMTR to multiply it to the + Householder transformations represented as Householder vectors in + A. +*/ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); + zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo); + zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); +/* + Computing MAX + Computing 2nd power +*/ + i__3 = *n; + i__4 = indwk2; + i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r; + lopt = max(i__1,i__2); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + return 0; + +/* End of ZHEEVD */ + +} /* zheevd_ */ + +/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__; + static doublecomplex taui; + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static doublecomplex alpha; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + ZHETD2 reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q' * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETD2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(1:i-1,i+1) +*/ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + i__1 = i__; + e[i__1] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b59, &tau[1], &c__1) + ; + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.; + i__1 = i__ + 1; + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__1] = a[i__2].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Generate elementary reflector H(i) = I - tau * v * v' + to annihilate A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + i__2 = i__; + e[i__2] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b59, &tau[ + i__], &c__1); + +/* Compute w := x - 1/2 * tau * (x'*v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + i__2 = *n - i__; + zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* + Apply the transformation as a rank-2 update: + A := A - v * w' - w * v' +*/ + + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + i__2 = i__; + i__3 = i__ + i__ * a_dim1; + d__[i__2] = a[i__3].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n; + i__2 = *n + *n * a_dim1; + d__[i__1] = a[i__2].r; + } + + return 0; + +/* End of ZHETD2 */ + +} /* zhetd2_ */ + +/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + static logical upper; + extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + doublecomplex *, integer *); + static integer ldwork, lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZHETRD reduces a complex Hermitian matrix A to real symmetric + tridiagonal form T by a unitary similarity transformation: + Q**H * A * Q = T. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit, if UPLO = 'U', the diagonal and first superdiagonal + of A are overwritten by the corresponding elements of the + tridiagonal matrix T, and the elements above the first + superdiagonal, with the array TAU, represent the unitary + matrix Q as a product of elementary reflectors; if UPLO + = 'L', the diagonal and first subdiagonal of A are over- + written by the corresponding elements of the tridiagonal + matrix T, and the elements below the first subdiagonal, with + the array TAU, represent the unitary matrix Q as a product + of elementary reflectors. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + D (output) DOUBLE PRECISION array, dimension (N) + The diagonal elements of the tridiagonal matrix T: + D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (N-1) + The off-diagonal elements of the tridiagonal matrix T: + E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors (see Further + Details). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= 1. + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n-1) . . . H(2) H(1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in + A(1:i-1,i+1), and tau in TAU(i). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(n-1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), + and tau in TAU(i). + + The contents of A on exit are illustrated by the following examples + with n = 5: + + if UPLO = 'U': if UPLO = 'L': + + ( d e v2 v3 v4 ) ( d ) + ( d e v3 v4 ) ( e d ) + ( d e v4 ) ( v1 e d ) + ( d e ) ( v1 v2 e d ) + ( d ) ( v1 v2 v3 e d ) + + where d and e denote diagonal and off-diagonal elements of T, and vi + denotes an element of the vector defining H(i). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHETRD", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* + Determine when to cross over from blocked to unblocked code + (last block is always handled by unblocked code). + + Computing MAX +*/ + i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: determine the + minimum value of NB, and reduce NB or force use of + unblocked code by setting NX = N. + + Computing MAX +*/ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* + Reduce the upper triangle of A. + Columns 1:kk are handled by the unblocked method. +*/ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = i__ + nb - 1; + zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork); + +/* + Update the unreduced submatrix A(1:i-1,1:i-1), using an + update of the form: A := A - V*W' - W*V' +*/ + + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b1015, &a[a_offset], lda); + +/* + Copy superdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* + Reduce columns i:i+nb-1 to tridiagonal form and form the + matrix W which is needed to update the unreduced part of + the matrix +*/ + + i__3 = *n - i__ + 1; + zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork); + +/* + Update the unreduced submatrix A(i+nb:n,i+nb:n), using + an update of the form: A := A - V*W' - W*V' +*/ + + i__3 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1015, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda); + +/* + Copy subdiagonal elements back into A, and diagonal + elements into D +*/ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j; + i__5 = j + j * a_dim1; + d__[i__4] = a[i__5].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo); + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZHETRD */ + +} /* zhetrd_ */ + +/* Subroutine */ int zhseqr_(char *job, char *compz, integer *n, integer *ilo, + integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, + doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], + i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + char ch__1[2]; + + /* Builtin functions */ + double d_imag(doublecomplex *); + void d_cnjg(doublecomplex *, doublecomplex *); + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__, j, k, l; + static doublecomplex s[225] /* was [15][15] */, v[16]; + static integer i1, i2, ii, nh, nr, ns, nv; + static doublecomplex vv[16]; + static integer itn; + static doublecomplex tau; + static integer its; + static doublereal ulp, tst1; + static integer maxb, ierr; + static doublereal unfl; + static doublecomplex temp; + static doublereal ovfl; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static integer itemp; + static doublereal rtemp; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical initz, wantt, wantz; + static doublereal rwork[1]; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + extern integer izamax_(integer *, doublecomplex *, integer *); + extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, + doublereal *); + extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, doublecomplex *, integer *, integer *), + zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaset_(char *, integer *, + integer *, doublecomplex *, doublecomplex *, doublecomplex *, + integer *), zlarfx_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *); + static doublereal smlnum; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZHSEQR computes the eigenvalues of a complex upper Hessenberg + matrix H, and, optionally, the matrices T and Z from the Schur + decomposition H = Z T Z**H, where T is an upper triangular matrix + (the Schur form), and Z is the unitary matrix of Schur vectors. + + Optionally Z may be postmultiplied into an input unitary matrix Q, + so that this routine can give the Schur factorization of a matrix A + which has been reduced to the Hessenberg form H by the unitary + matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. + + Arguments + ========= + + JOB (input) CHARACTER*1 + = 'E': compute eigenvalues only; + = 'S': compute eigenvalues and the Schur form T. + + COMPZ (input) CHARACTER*1 + = 'N': no Schur vectors are computed; + = 'I': Z is initialized to the unit matrix and the matrix Z + of Schur vectors of H is returned; + = 'V': Z must contain an unitary matrix Q on entry, and + the product Q*Z is returned. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows + and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally + set by a previous call to ZGEBAL, and then passed to CGEHRD + when the matrix output by ZGEBAL is reduced to Hessenberg + form. Otherwise ILO and IHI should be set to 1 and N + respectively. + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if JOB = 'S', H contains the upper triangular matrix + T from the Schur decomposition (the Schur form). If + JOB = 'E', the contents of H are unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues. If JOB = 'S', the eigenvalues are + stored in the same order as on the diagonal of the Schur form + returned in H, with W(i) = H(i,i). + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + If COMPZ = 'N': Z is not referenced. + If COMPZ = 'I': on entry, Z need not be set, and on exit, Z + contains the unitary matrix Z of the Schur vectors of H. + If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, + which is assumed to be equal to the unit matrix except for + the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. + Normally Q is the unitary matrix generated by ZUNGHR after + the call to ZGEHRD which formed the Hessenberg matrix H. + + LDZ (input) INTEGER + The leading dimension of the array Z. + LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, ZHSEQR failed to compute all the + eigenvalues in a total of 30*(IHI-ILO+1) iterations; + elements 1:ilo-1 and i+1:n of W contain those + eigenvalues which have been successfully computed. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + wantt = lsame_(job, "S"); + initz = lsame_(compz, "I"); + wantz = initz || lsame_(compz, "V"); + + *info = 0; + i__1 = max(1,*n); + work[1].r = (doublereal) i__1, work[1].i = 0.; + lquery = *lwork == -1; + if (! lsame_(job, "E") && ! wantt) { + *info = -1; + } else if (! lsame_(compz, "N") && ! wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -4; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1,*n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { + *info = -10; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZHSEQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Initialize Z, if necessary */ + + if (initz) { + zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz); + } + +/* Store the eigenvalues isolated by ZGEBAL. */ + + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + i__ * h_dim1; + w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; +/* L10: */ + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + i__ * h_dim1; + w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; +/* L20: */ + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + +/* + Set rows and columns ILO to IHI to zero below the first + subdiagonal. +*/ + + i__1 = *ihi - 2; + for (j = *ilo; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j + 2; i__ <= i__2; ++i__) { + i__3 = i__ + j * h_dim1; + h__[i__3].r = 0., h__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + nh = *ihi - *ilo + 1; + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are re-set inside the main loop. +*/ + + if (wantt) { + i1 = 1; + i2 = *n; + } else { + i1 = *ilo; + i2 = *ihi; + } + +/* Ensure that the subdiagonal elements are real. */ + + i__1 = *ihi; + for (i__ = *ilo + 1; i__ <= i__1; ++i__) { + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (d_imag(&temp) != 0.) { + d__1 = temp.r; + d__2 = d_imag(&temp); + rtemp = dlapy2_(&d__1, &d__2); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.; + z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; + temp.r = z__1.r, temp.i = z__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + d_cnjg(&z__1, &temp); + zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (i__ < *ihi) { + i__2 = i__ + 1 + i__ * h_dim1; + i__3 = i__ + 1 + i__ * h_dim1; + z__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, z__1.i = + temp.r * h__[i__3].i + temp.i * h__[i__3].r; + h__[i__2].r = z__1.r, h__[i__2].i = z__1.i; + } + if (wantz) { + zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); + } + } +/* L50: */ + } + +/* + Determine the order of the multi-shift QR algorithm to be used. + + Writing concatenation +*/ + i__4[0] = 1, a__1[0] = job; + i__4[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); + ns = ilaenv_(&c__4, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); +/* Writing concatenation */ + i__4[0] = 1, a__1[0] = job; + i__4[1] = 1, a__1[1] = compz; + s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); + maxb = ilaenv_(&c__8, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (ns <= 1 || ns > nh || maxb >= nh) { + +/* Use the standard double-shift algorithm */ + + zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, + ihi, &z__[z_offset], ldz, info); + return 0; + } + maxb = max(2,maxb); +/* Computing MIN */ + i__1 = min(ns,maxb); + ns = min(i__1,15); + +/* + Now 1 < NS <= MAXB < NH. + + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (nh / ulp); + +/* ITN is the total number of multiple-shift QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of at most MAXB. Each iteration of the loop + works with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L60: + if (i__ < *ilo) { + goto L180; + } + +/* + Perform multiple-shift QR iterations on rows and columns ILO to I + until a submatrix of order at most MAXB splits off at the bottom + because a subdiagonal element has become negligible. +*/ + + l = *ilo; + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + i__3 = k - 1 + (k - 1) * h_dim1; + i__5 = k + k * h_dim1; + tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k - + 1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__5].r, + abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs( + d__4))); + if (tst1 == 0.) { + i__3 = i__ - l + 1; + tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); + } + i__3 = k + (k - 1) * h_dim1; +/* Computing MAX */ + d__2 = ulp * tst1; + if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) { + goto L80; + } +/* L70: */ + } +L80: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible. */ + + i__2 = l + (l - 1) * h_dim1; + h__[i__2].r = 0., h__[i__2].i = 0.; + } + +/* Exit from loop if a submatrix of order <= MAXB has split off. */ + + if (l >= i__ - maxb + 1) { + goto L170; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! wantt) { + i1 = l; + i2 = i__; + } + + if (its == 20 || its == 30) { + +/* Exceptional shifts. */ + + i__2 = i__; + for (ii = i__ - ns + 1; ii <= i__2; ++ii) { + i__3 = ii; + i__5 = ii + (ii - 1) * h_dim1; + i__6 = ii + ii * h_dim1; + d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = h__[i__6].r, + abs(d__2))) * 1.5; + w[i__3].r = d__3, w[i__3].i = 0.; +/* L90: */ + } + } else { + +/* Use eigenvalues of trailing submatrix of order NS as shifts. */ + + zlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * + h_dim1], ldh, s, &c__15); + zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - + ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); + if (ierr > 0) { + +/* + If ZLAHQR failed to compute all NS eigenvalues, use the + unconverged diagonal elements as the remaining shifts. +*/ + + i__2 = ierr; + for (ii = 1; ii <= i__2; ++ii) { + i__3 = i__ - ns + ii; + i__5 = ii + ii * 15 - 16; + w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i; +/* L100: */ + } + } + } + +/* + Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) + where G is the Hessenberg submatrix H(L:I,L:I) and w is + the vector of shifts (stored in W). The result is + stored in the local array V. +*/ + + v[0].r = 1., v[0].i = 0.; + i__2 = ns + 1; + for (ii = 2; ii <= i__2; ++ii) { + i__3 = ii - 1; + v[i__3].r = 0., v[i__3].i = 0.; +/* L110: */ + } + nv = 1; + i__2 = i__; + for (j = i__ - ns + 1; j <= i__2; ++j) { + i__3 = nv + 1; + zcopy_(&i__3, v, &c__1, vv, &c__1); + i__3 = nv + 1; + i__5 = j; + z__1.r = -w[i__5].r, z__1.i = -w[i__5].i; + zgemv_("No transpose", &i__3, &nv, &c_b60, &h__[l + l * h_dim1], + ldh, vv, &c__1, &z__1, v, &c__1); + ++nv; + +/* + Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, + reset it to the unit vector. +*/ + + itemp = izamax_(&nv, v, &c__1); + i__3 = itemp - 1; + rtemp = (d__1 = v[i__3].r, abs(d__1)) + (d__2 = d_imag(&v[itemp - + 1]), abs(d__2)); + if (rtemp == 0.) { + v[0].r = 1., v[0].i = 0.; + i__3 = nv; + for (ii = 2; ii <= i__3; ++ii) { + i__5 = ii - 1; + v[i__5].r = 0., v[i__5].i = 0.; +/* L120: */ + } + } else { + rtemp = max(rtemp,smlnum); + d__1 = 1. / rtemp; + zdscal_(&nv, &d__1, v, &c__1); + } +/* L130: */ + } + +/* Multiple-shift QR step */ + + i__2 = i__ - 1; + for (k = l; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. NR is the order of G. + + Computing MIN +*/ + i__3 = ns + 1, i__5 = i__ - k + 1; + nr = min(i__3,i__5); + if (k > l) { + zcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + zlarfg_(&nr, v, &v[1], &c__1, &tau); + if (k > l) { + i__3 = k + (k - 1) * h_dim1; + h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; + i__3 = i__; + for (ii = k + 1; ii <= i__3; ++ii) { + i__5 = ii + (k - 1) * h_dim1; + h__[i__5].r = 0., h__[i__5].i = 0.; +/* L140: */ + } + } + v[0].r = 1., v[0].i = 0.; + +/* + Apply G' from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2 - k + 1; + d_cnjg(&z__1, &tau); + zlarfx_("Left", &nr, &i__3, v, &z__1, &h__[k + k * h_dim1], ldh, & + work[1]); + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+NR,I). + + Computing MIN +*/ + i__5 = k + nr; + i__3 = min(i__5,i__) - i1 + 1; + zlarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, + &work[1]); + + if (wantz) { + +/* Accumulate transformations in the matrix Z */ + + zlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], + ldz, &work[1]); + } +/* L150: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (d_imag(&temp) != 0.) { + d__1 = temp.r; + d__2 = d_imag(&temp); + rtemp = dlapy2_(&d__1, &d__2); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.; + z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; + temp.r = z__1.r, temp.i = z__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + d_cnjg(&z__1, &temp); + zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (wantz) { + zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); + } + } + +/* L160: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L170: + +/* + A submatrix of order <= MAXB in rows and columns L to I has split + off. Use the double-shift QR algorithm to handle it. +*/ + + zlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi, + &z__[z_offset], ldz, info); + if (*info > 0) { + return 0; + } + +/* + Decrement number of remaining iterations, and return to start of + the main loop with a new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L60; + +L180: + i__1 = max(1,*n); + work[1].r = (doublereal) i__1, work[1].i = 0.; + return 0; + +/* End of ZHSEQR */ + +} /* zhseqr_ */ + +/* Subroutine */ int zlabrd_(integer *m, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, + doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * + ldx, doublecomplex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex alpha; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *), zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLABRD reduces the first NB rows and columns of a complex general + m by n matrix A to upper or lower real bidiagonal form by a unitary + transformation Q' * A * P, and returns the matrices X and Y which + are needed to apply the transformation to the unreduced part of A. + + If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower + bidiagonal form. + + This is an auxiliary routine called by ZGEBRD + + Arguments + ========= + + M (input) INTEGER + The number of rows in the matrix A. + + N (input) INTEGER + The number of columns in the matrix A. + + NB (input) INTEGER + The number of leading rows and columns of A to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n general matrix to be reduced. + On exit, the first NB rows and columns of the matrix are + overwritten; the rest of the array is unchanged. + If m >= n, elements on and below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors; and + elements above the diagonal in the first NB rows, with the + array TAUP, represent the unitary matrix P as a product + of elementary reflectors. + If m < n, elements below the diagonal in the first NB + columns, with the array TAUQ, represent the unitary + matrix Q as a product of elementary reflectors, and + elements on and above the diagonal in the first NB rows, + with the array TAUP, represent the unitary matrix P as + a product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + D (output) DOUBLE PRECISION array, dimension (NB) + The diagonal elements of the first NB rows and columns of + the reduced matrix. D(i) = A(i,i). + + E (output) DOUBLE PRECISION array, dimension (NB) + The off-diagonal elements of the first NB rows and columns of + the reduced matrix. + + TAUQ (output) COMPLEX*16 array dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix Q. See Further Details. + + TAUP (output) COMPLEX*16 array, dimension (NB) + The scalar factors of the elementary reflectors which + represent the unitary matrix P. See Further Details. + + X (output) COMPLEX*16 array, dimension (LDX,NB) + The m-by-nb matrix X required to update the unreduced part + of A. + + LDX (input) INTEGER + The leading dimension of the array X. LDX >= max(1,M). + + Y (output) COMPLEX*16 array, dimension (LDY,NB) + The n-by-nb matrix Y required to update the unreduced part + of A. + + LDY (output) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrices Q and P are represented as products of elementary + reflectors: + + Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) + + Each H(i) and G(i) has the form: + + H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' + + where tauq and taup are complex scalars, and v and u are complex + vectors. + + If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in + A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in + A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in + A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). + + The elements of the vectors v and u together form the m-by-nb matrix + V and the nb-by-n matrix U' which are needed, with X and Y, to apply + the transformation to the unreduced part of the matrix, using a block + update of the form: A := A - V*Y' - X*U'. + + The contents of A on exit are illustrated by the following examples + with nb = 2: + + m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): + + ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) + ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) + ( v1 v2 a a a ) ( v1 1 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) ( v1 v2 a a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix which is unchanged, + vi denotes an element of the vector defining H(i), and ui an element + of the vector defining G(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[i__ + i__ * + a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & + tauq[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + ( + i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & + c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b59, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &x[i__ + + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b59, & + y[i__ * y_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + zlacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b60, &a[i__ + + (i__ + 1) * a_dim1], lda); + zlacgv_(&i__, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, + &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + (i__ + 1) * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + ( + i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b59, &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b59, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b59, &x[i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], + lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, &a[i__ + + i__ * a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &x[i__ + x_dim1], ldx); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = i__ + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & + taup[i__]); + i__2 = i__; + d__[i__2] = alpha.r; + if (i__ < *m) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + i__ + * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, & + x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &y[i__ + + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[ + i__ * x_dim1 + 1], &c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ + i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *m - i__; + zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + +/* Update A(i+1:m,i) */ + + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &y[i__ + y_dim1], ldy); + i__2 = *m - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, + &tauq[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * + a_dim1], &c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], & + c__1); + i__2 = *m - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b59, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[ + i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *m - i__; + zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &x[i__ + 1 + + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b59, &y[i__ * y_dim1 + 1], &c__1); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1) + * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & + c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } else { + i__2 = *n - i__ + 1; + zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + return 0; + +/* End of ZLABRD */ + +} /* zlabrd_ */ + +/* Subroutine */ int zlacgv_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, ioff; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLACGV conjugates a complex vector of length N. + + Arguments + ========= + + N (input) INTEGER + The length of the vector X. N >= 0. + + X (input/output) COMPLEX*16 array, dimension + (1+(N-1)*abs(INCX)) + On entry, the vector of length N to be conjugated. + On exit, X is overwritten with conjg(X). + + INCX (input) INTEGER + The spacing between successive elements of X. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + d_cnjg(&z__1, &x[i__]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L10: */ + } + } else { + ioff = 1; + if (*incx < 0) { + ioff = 1 - (*n - 1) * *incx; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ioff; + d_cnjg(&z__1, &x[ioff]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ioff += *incx; +/* L20: */ + } + } + return 0; + +/* End of ZLACGV */ + +} /* zlacgv_ */ + +/* Subroutine */ int zlacp2_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLACP2 copies all or part of a real two-dimensional matrix A to a + complex matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX*16 array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4], b[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of ZLACP2 */ + +} /* zlacp2_ */ + +/* Subroutine */ int zlacpy_(char *uplo, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + ZLACPY copies all or part of a two-dimensional matrix A to another + matrix B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be copied to B. + = 'U': Upper triangular part + = 'L': Lower triangular part + Otherwise: All of the matrix A + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. If UPLO = 'U', only the upper trapezium + is accessed; if UPLO = 'L', only the lower trapezium is + accessed. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + B (output) COMPLEX*16 array, dimension (LDB,N) + On exit, B = A in the locations specified by UPLO. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, "L")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of ZLACPY */ + +} /* zlacpy_ */ + +/* Subroutine */ int zlacrm_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, + integer *ldc, doublereal *rwork) +{ + /* System generated locals */ + integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLACRM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by N and complex; B is N by N and real; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA, N) + A contains the M by N matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) DOUBLE PRECISION array, dimension (LDB, N) + B contains the N by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,N). + + C (input) COMPLEX*16 array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,N). + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + rwork[(j - 1) * *m + i__] = a[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, & + c_b324, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = d_imag(&a[i__ + j * a_dim1]); +/* L50: */ + } +/* L60: */ + } + dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, & + c_b324, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + d__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + z__1.r = d__1, z__1.i = rwork[i__5]; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of ZLACRM */ + +} /* zlacrm_ */ + +/* Double Complex */ VOID zladiv_(doublecomplex * ret_val, doublecomplex *x, + doublecomplex *y) +{ + /* System generated locals */ + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + static doublereal zi, zr; + extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLADIV := X / Y, where X and Y are complex. The computation of X / Y + will not overflow on an intermediary step unless the results + overflows. + + Arguments + ========= + + X (input) COMPLEX*16 + Y (input) COMPLEX*16 + The complex scalars X and Y. + + ===================================================================== +*/ + + + d__1 = x->r; + d__2 = d_imag(x); + d__3 = y->r; + d__4 = d_imag(y); + dladiv_(&d__1, &d__2, &d__3, &d__4, &zr, &zi); + z__1.r = zr, z__1.i = zi; + ret_val->r = z__1.r, ret_val->i = z__1.i; + + return ; + +/* End of ZLADIV */ + +} /* zladiv_ */ + +/* Subroutine */ int zlaed0_(integer *qsiz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, + integer *ldqs, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; + static doublereal temp; + static integer curr, iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + static integer indxq, iwrem, iqptr, tlvls; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed7_(integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublecomplex *, doublereal *, integer *, integer *) + ; + static integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + static integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *); + static integer curlvl, matsiz, iprmpt, smlsiz; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + Using the divide and conquer method, ZLAED0 computes all eigenvalues + of a symmetric tridiagonal matrix which is one diagonal block of + those from reducing a dense or band Hermitian matrix and + corresponding eigenvectors of the dense or band matrix. + + Arguments + ========= + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the off-diagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, Q must contain an QSIZ x N matrix whose columns + unitarily orthonormal. It is a part of the unitary matrix + that reduces the full dense Hermitian matrix to a + (reducible) symmetric tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + IWORK (workspace) INTEGER array, + the dimension of IWORK must be at least + 6 + 6*N + 5*N*lg N + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + RWORK (workspace) DOUBLE PRECISION array, + dimension (1 + 3*N + 2*N*lg N + 3*N**2) + ( lg( N ) = smallest integer k + such that 2^k >= N ) + + QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N) + Used to store parts of + the eigenvector matrix when the updating matrix multiplies + take place. + + LDQS (input) INTEGER + The leading dimension of the array QSTORE. + LDQS >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + ===================================================================== + + Warning: N could be as big as QSIZ! + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) + $ THEN +*/ + if (*qsiz < max(0,*n)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else if (*ldqs < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED0", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, "ZLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + Determine the size and placement of the submatrices, and save in + the leading elements of IWORK. +*/ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* + Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 + using rank-1 modifications (cuts). +*/ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + +/* + Set up workspaces for eigenvalues only/accumulate new vectors + routine +*/ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; +/* Initialize pointers */ + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + +/* + Solve each submatrix eigenproblem at the bottom of the divide and + conquer tree. +*/ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + ll = iq - 1 + iwork[iqptr + curr]; + dsteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & + rwork[1], info); + zlacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & + matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] + ); +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* + Successively merge eigensystems of adjacent submatrices + into eigensystem for the corresponding larger matrix. + + while ( SUBPBS > 1 ) +*/ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* + Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) + into an eigensystem of size MATSIZ. ZLAED7 handles the case + when the eigenvectors of a full or band Hermitian matrix (which + was reduced to tridiagonal form) are desired. + + I am free to use Q as a valuable working space until Loop 150. +*/ + + zlaed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ + submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & + iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ + igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * + q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* + end while + + Re-merge the eigenvalues/vectors which were deflated at the final + merge step. +*/ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + rwork[i__] = d__[j]; + zcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] + , &c__1); +/* L100: */ + } + dcopy_(n, &rwork[1], &c__1, &d__[1], &c__1); + + return 0; + +/* End of ZLAED0 */ + +} /* zlaed0_ */ + +/* Subroutine */ int zlaed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, + doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, + integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * + work, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr, + indxc, indxp; + extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *), + zlaed8_(integer *, integer *, integer *, doublecomplex *, integer + *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + static integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal * + ); + static integer coltyp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLAED7 computes the updated eigensystem of a diagonal + matrix after modification by a rank-one symmetric matrix. This + routine is used only for the eigenproblem which requires all + eigenvalues and optionally eigenvectors of a dense or banded + Hermitian matrix that has been reduced to tridiagonal form. + + T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) + + where Z = Q'u, u is a vector of length N with ones in the + CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. + + The eigenvectors of the original matrix are stored in Q, and the + eigenvalues are in D. The algorithm consists of three stages: + + The first stage consists of deflating the size of the problem + when there are multiple eigenvalues or if there is a zero in + the Z vector. For each such occurence the dimension of the + secular equation problem is reduced by one. This stage is + performed by the routine DLAED2. + + The second stage consists of calculating the updated + eigenvalues. This is done by finding the roots of the secular + equation via the routine DLAED4 (as called by SLAED3). + This routine also calculates the eigenvectors of the current + problem. + + The final stage consists of computing the updated eigenvectors + directly using the updated eigenvalues. The eigenvectors for + the current problem are multiplied with the eigenvectors from + the overall problem. + + Arguments + ========= + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. min(1,N) <= CUTPNT <= N. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the full matrix to tridiagonal form. QSIZ >= N. + + TLVLS (input) INTEGER + The total number of merging levels in the overall divide and + conquer tree. + + CURLVL (input) INTEGER + The current level in the overall merge routine, + 0 <= curlvl <= tlvls. + + CURPBM (input) INTEGER + The current problem in the current level in the overall + merge routine (counting from upper left to lower right). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the eigenvalues of the rank-1-perturbed matrix. + On exit, the eigenvalues of the repaired matrix. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, the eigenvectors of the rank-1-perturbed matrix. + On exit, the eigenvectors of the repaired tridiagonal matrix. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max(1,N). + + RHO (input) DOUBLE PRECISION + Contains the subdiagonal element used to create the rank-1 + modification. + + INDXQ (output) INTEGER array, dimension (N) + This contains the permutation which will reintegrate the + subproblem just solved back into sorted order, + ie. D( INDXQ( I = 1, N ) ) will be in ascending order. + + IWORK (workspace) INTEGER array, dimension (4*N) + + RWORK (workspace) DOUBLE PRECISION array, + dimension (3*N+2*QSIZ*N) + + WORK (workspace) COMPLEX*16 array, dimension (QSIZ*N) + + QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) + Stores eigenvectors of submatrices encountered during + divide and conquer, packed together. QPTR points to + beginning of the submatrices. + + QPTR (input/output) INTEGER array, dimension (N+2) + List of indices pointing to beginning of submatrices stored + in QSTORE. The submatrices are numbered starting at the + bottom left of the divide and conquer tree, from left to + right and bottom to top. + + PRMPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in PERM a + level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) + indicates the size of the permutation and also the size of + the full, non-deflated problem. + + PERM (input) INTEGER array, dimension (N lg N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (input) INTEGER array, dimension (N lg N) + Contains a list of pointers which indicate where in GIVCOL a + level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) + indicates the number of Givens rotations. + + GIVCOL (input) INTEGER array, dimension (2, N lg N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: if INFO = 1, an eigenvalue did not converge + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN +*/ + if (*n < 0) { + *info = -1; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED7", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* + The following values are for bookkeeping purposes only. They are + integer pointers which indicate the portion of the workspace + used by a particular array in DLAED2 and SLAED3. +*/ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* + Form the z-vector which consists of the last row of Q_1 and the + first row of Q_2. +*/ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ + iz + *n], info); + +/* + When solving the final problem, we no longer need the stored data, + so we will overwrite the data from this level onto the previously + used storage space. +*/ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], + &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ + indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ + (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] + , &rwork[iw], &qstore[qptr[curr]], &k, info); + zlacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ + q_offset], ldq, &rwork[iq]); +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + if (*info != 0) { + return 0; + } + +/* Prepare the INDXQ sorting premutation. */ + + n1 = k; + n2 = *n - k; + ind1 = 1; + ind2 = *n; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + + return 0; + +/* End of ZLAED7 */ + +} /* zlaed7_ */ + +/* Subroutine */ int zlaed8_(integer *k, integer *n, integer *qsiz, + doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, + integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * + q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, + integer *indxq, integer *perm, integer *givptr, integer *givcol, + doublereal *givnum, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static doublereal c__; + static integer i__, j; + static doublereal s, t; + static integer k2, n1, n2, jp, n1p1; + static doublereal eps, tau, tol; + static integer jlam, imax, jmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *), zdrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + ; + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, + Courant Institute, NAG Ltd., and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLAED8 merges the two sets of eigenvalues together into a single + sorted set. Then it tries to deflate the size of the problem. + There are two ways in which deflation can occur: when two or more + eigenvalues are close together or if there is a tiny element in the + Z vector. For each such occurrence the order of the related secular + equation problem is reduced by one. + + Arguments + ========= + + K (output) INTEGER + Contains the number of non-deflated eigenvalues. + This is the order of the related secular equation. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + QSIZ (input) INTEGER + The dimension of the unitary matrix used to reduce + the dense or band matrix to tridiagonal form. + QSIZ >= N if ICOMPQ = 1. + + Q (input/output) COMPLEX*16 array, dimension (LDQ,N) + On entry, Q contains the eigenvectors of the partially solved + system which has been previously updated in matrix + multiplies with other partially solved eigensystems. + On exit, Q contains the trailing (N-K) updated eigenvectors + (those which were deflated) in its last N-K columns. + + LDQ (input) INTEGER + The leading dimension of the array Q. LDQ >= max( 1, N ). + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, D contains the eigenvalues of the two submatrices to + be combined. On exit, D contains the trailing (N-K) updated + eigenvalues (those which were deflated) sorted into increasing + order. + + RHO (input/output) DOUBLE PRECISION + Contains the off diagonal element associated with the rank-1 + cut which originally split the two submatrices which are now + being recombined. RHO is modified during the computation to + the value required by DLAED3. + + CUTPNT (input) INTEGER + Contains the location of the last eigenvalue in the leading + sub-matrix. MIN(1,N) <= CUTPNT <= N. + + Z (input) DOUBLE PRECISION array, dimension (N) + On input this vector contains the updating vector (the last + row of the first sub-eigenvector matrix and the first row of + the second sub-eigenvector matrix). The contents of Z are + destroyed during the updating process. + + DLAMDA (output) DOUBLE PRECISION array, dimension (N) + Contains a copy of the first K eigenvalues which will be used + by DLAED3 to form the secular equation. + + Q2 (output) COMPLEX*16 array, dimension (LDQ2,N) + If ICOMPQ = 0, Q2 is not referenced. Otherwise, + Contains a copy of the first K eigenvectors which will be used + by DLAED7 in a matrix multiply (DGEMM) to update the new + eigenvectors. + + LDQ2 (input) INTEGER + The leading dimension of the array Q2. LDQ2 >= max( 1, N ). + + W (output) DOUBLE PRECISION array, dimension (N) + This will hold the first k values of the final + deflation-altered z-vector and will be passed to DLAED3. + + INDXP (workspace) INTEGER array, dimension (N) + This will contain the permutation used to place deflated + values of D at the end of the array. On output INDXP(1:K) + points to the nondeflated D-values and INDXP(K+1:N) + points to the deflated eigenvalues. + + INDX (workspace) INTEGER array, dimension (N) + This will contain the permutation used to sort the contents of + D into ascending order. + + INDXQ (input) INTEGER array, dimension (N) + This contains the permutation which separately sorts the two + sub-problems in D into ascending order. Note that elements in + the second half of this permutation must first have CUTPNT + added to their values in order to be accurate. + + PERM (output) INTEGER array, dimension (N) + Contains the permutations (from deflation and sorting) to be + applied to each eigenblock. + + GIVPTR (output) INTEGER + Contains the number of Givens rotations which took place in + this subproblem. + + GIVCOL (output) INTEGER array, dimension (2, N) + Each pair of numbers indicates a pair of columns to take place + in a Givens rotation. + + GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) + Each number indicates the S value to be used in the + corresponding Givens rotation. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --d__; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --indxp; + --indx; + --indxq; + --perm; + givcol -= 3; + givnum -= 3; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -5; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -8; + } else if (*ldq2 < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAED8", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b1294, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = EPSILON; + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* + If the rank-1 modifier is small enough, no more needs to be done + -- except to reorganize Q so that its columns correspond with the + elements in D. +*/ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L50: */ + } + zlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); + return 0; + } + +/* + If there are multiple eigenvalues then the problem deflates. Here + the number of equal eigenvalues are found. As each equal + eigenvalue is found, an elementary reflector is computed to rotate + the corresponding eigensubspace so that the corresponding + components of Z are zero in this new basis. +*/ + + *k = 0; + *givptr = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L100; + } + } else { + jlam = j; + goto L70; + } +/* L60: */ + } +L70: + ++j; + if (j > *n) { + goto L90; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* + Find sqrt(a**2+b**2) without overflow or + destructive underflow. +*/ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ + indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L80: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L80; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L70; +L90: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L100: + +/* + Sort the eigenvalues and corresponding eigenvectors into DLAMDA + and Q2 respectively. The eigenvalues/vectors which were not + deflated go into the first K slots of DLAMDA and Q2 respectively, + while those which were deflated go into the last N - K slots. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & + c__1); +/* L110: */ + } + +/* + The deflated eigenvalues and their corresponding vectors go back + into the last N - K slots of D and Q respectively. +*/ + + if (*k < *n) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + zlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + + 1) * q_dim1 + 1], ldq); + } + + return 0; + +/* End of ZLAED8 */ + +} /* zlaed8_ */ + +/* Subroutine */ int zlahqr_(logical *wantt, logical *wantz, integer *n, + integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, + doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, + integer *ldz, integer *info) +{ + /* System generated locals */ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + double d_imag(doublecomplex *); + void z_sqrt(doublecomplex *, doublecomplex *), d_cnjg(doublecomplex *, + doublecomplex *); + double z_abs(doublecomplex *); + + /* Local variables */ + static integer i__, j, k, l, m; + static doublereal s; + static doublecomplex t, u, v[2], x, y; + static integer i1, i2; + static doublecomplex t1; + static doublereal t2; + static doublecomplex v2; + static doublereal h10; + static doublecomplex h11; + static doublereal h21; + static doublecomplex h22; + static integer nh, nz; + static doublecomplex h11s; + static integer itn, its; + static doublereal ulp; + static doublecomplex sum; + static doublereal tst1; + static doublecomplex temp; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static doublereal rtemp, rwork[1]; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + + extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, + doublereal *); + static doublereal smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLAHQR is an auxiliary routine called by ZHSEQR to update the + eigenvalues and Schur decomposition already computed by ZHSEQR, by + dealing with the Hessenberg submatrix in rows and columns ILO to IHI. + + Arguments + ========= + + WANTT (input) LOGICAL + = .TRUE. : the full Schur form T is required; + = .FALSE.: only eigenvalues are required. + + WANTZ (input) LOGICAL + = .TRUE. : the matrix of Schur vectors Z is required; + = .FALSE.: Schur vectors are not required. + + N (input) INTEGER + The order of the matrix H. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + It is assumed that H is already upper triangular in rows and + columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). + ZLAHQR works primarily with the Hessenberg submatrix in rows + and columns ILO to IHI, but applies transformations to all of + H if WANTT is .TRUE.. + 1 <= ILO <= max(1,IHI); IHI <= N. + + H (input/output) COMPLEX*16 array, dimension (LDH,N) + On entry, the upper Hessenberg matrix H. + On exit, if WANTT is .TRUE., H is upper triangular in rows + and columns ILO:IHI, with any 2-by-2 diagonal blocks in + standard form. If WANTT is .FALSE., the contents of H are + unspecified on exit. + + LDH (input) INTEGER + The leading dimension of the array H. LDH >= max(1,N). + + W (output) COMPLEX*16 array, dimension (N) + The computed eigenvalues ILO to IHI are stored in the + corresponding elements of W. If WANTT is .TRUE., the + eigenvalues are stored in the same order as on the diagonal + of the Schur form returned in H, with W(i) = H(i,i). + + ILOZ (input) INTEGER + IHIZ (input) INTEGER + Specify the rows of Z to which transformations must be + applied if WANTZ is .TRUE.. + 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + If WANTZ is .TRUE., on entry Z must contain the current + matrix Z of transformations accumulated by ZHSEQR, and on + exit Z has been updated; transformations are applied only to + the submatrix Z(ILOZ:IHIZ,ILO:IHI). + If WANTZ is .FALSE., Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + > 0: if INFO = i, ZLAHQR failed to compute all the + eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) + iterations; elements i+1:ihi of W contain those + eigenvalues which have been successfully computed. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --w; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + i__1 = *ilo; + i__2 = *ilo + *ilo * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + return 0; + } + + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + +/* + Set machine-dependent constants for the stopping criterion. + If norm(H) <= sqrt(OVFL), overflow should not occur. +*/ + + ulp = PRECISION; + smlnum = SAFEMINIMUM / ulp; + +/* + I1 and I2 are the indices of the first row and last column of H + to which transformations must be applied. If eigenvalues only are + being computed, I1 and I2 are set inside the main loop. +*/ + + if (*wantt) { + i1 = 1; + i2 = *n; + } + +/* ITN is the total number of QR iterations allowed. */ + + itn = nh * 30; + +/* + The main loop begins here. I is the loop index and decreases from + IHI to ILO in steps of 1. Each iteration of the loop works + with the active submatrix in rows and columns L to I. + Eigenvalues I+1 to IHI have already converged. Either L = ILO, or + H(L,L-1) is negligible so that the matrix splits. +*/ + + i__ = *ihi; +L10: + if (i__ < *ilo) { + goto L130; + } + +/* + Perform QR iterations on rows and columns ILO to I until a + submatrix of order 1 splits off at the bottom because a + subdiagonal element has become negligible. +*/ + + l = *ilo; + i__1 = itn; + for (its = 0; its <= i__1; ++its) { + +/* Look for a single small subdiagonal element. */ + + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + i__3 = k - 1 + (k - 1) * h_dim1; + i__4 = k + k * h_dim1; + tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k - + 1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__4].r, + abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs( + d__4))); + if (tst1 == 0.) { + i__3 = i__ - l + 1; + tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); + } + i__3 = k + (k - 1) * h_dim1; +/* Computing MAX */ + d__2 = ulp * tst1; + if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) { + goto L30; + } +/* L20: */ + } +L30: + l = k; + if (l > *ilo) { + +/* H(L,L-1) is negligible */ + + i__2 = l + (l - 1) * h_dim1; + h__[i__2].r = 0., h__[i__2].i = 0.; + } + +/* Exit from loop if a submatrix of order 1 has split off. */ + + if (l >= i__) { + goto L120; + } + +/* + Now the active submatrix is in rows and columns L to I. If + eigenvalues only are being computed, only the active submatrix + need be transformed. +*/ + + if (! (*wantt)) { + i1 = l; + i2 = i__; + } + + if (its == 10 || its == 20) { + +/* Exceptional shift. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + s = (d__1 = h__[i__2].r, abs(d__1)) * .75; + i__2 = i__ + i__ * h_dim1; + z__1.r = s + h__[i__2].r, z__1.i = h__[i__2].i; + t.r = z__1.r, t.i = z__1.i; + } else { + +/* Wilkinson's shift. */ + + i__2 = i__ + i__ * h_dim1; + t.r = h__[i__2].r, t.i = h__[i__2].i; + i__2 = i__ - 1 + i__ * h_dim1; + i__3 = i__ + (i__ - 1) * h_dim1; + d__1 = h__[i__3].r; + z__1.r = d__1 * h__[i__2].r, z__1.i = d__1 * h__[i__2].i; + u.r = z__1.r, u.i = z__1.i; + if (u.r != 0. || u.i != 0.) { + i__2 = i__ - 1 + (i__ - 1) * h_dim1; + z__2.r = h__[i__2].r - t.r, z__2.i = h__[i__2].i - t.i; + z__1.r = z__2.r * .5, z__1.i = z__2.i * .5; + x.r = z__1.r, x.i = z__1.i; + z__3.r = x.r * x.r - x.i * x.i, z__3.i = x.r * x.i + x.i * + x.r; + z__2.r = z__3.r + u.r, z__2.i = z__3.i + u.i; + z_sqrt(&z__1, &z__2); + y.r = z__1.r, y.i = z__1.i; + if (x.r * y.r + d_imag(&x) * d_imag(&y) < 0.) { + z__1.r = -y.r, z__1.i = -y.i; + y.r = z__1.r, y.i = z__1.i; + } + z__3.r = x.r + y.r, z__3.i = x.i + y.i; + zladiv_(&z__2, &u, &z__3); + z__1.r = t.r - z__2.r, z__1.i = t.i - z__2.i; + t.r = z__1.r, t.i = z__1.i; + } + } + +/* Look for two consecutive small subdiagonal elements. */ + + i__2 = l + 1; + for (m = i__ - 1; m >= i__2; --m) { + +/* + Determine the effect of starting the single-shift QR + iteration at row M, and see if this would make H(M,M-1) + negligible. +*/ + + i__3 = m + m * h_dim1; + h11.r = h__[i__3].r, h11.i = h__[i__3].i; + i__3 = m + 1 + (m + 1) * h_dim1; + h22.r = h__[i__3].r, h22.i = h__[i__3].i; + z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; + h11s.r = z__1.r, h11s.i = z__1.i; + i__3 = m + 1 + m * h_dim1; + h21 = h__[i__3].r; + s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) + + abs(h21); + z__1.r = h11s.r / s, z__1.i = h11s.i / s; + h11s.r = z__1.r, h11s.i = z__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.; + i__3 = m + (m - 1) * h_dim1; + h10 = h__[i__3].r; + tst1 = ((d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs( + d__2))) * ((d__3 = h11.r, abs(d__3)) + (d__4 = d_imag(& + h11), abs(d__4)) + ((d__5 = h22.r, abs(d__5)) + (d__6 = + d_imag(&h22), abs(d__6)))); + if ((d__1 = h10 * h21, abs(d__1)) <= ulp * tst1) { + goto L50; + } +/* L40: */ + } + i__2 = l + l * h_dim1; + h11.r = h__[i__2].r, h11.i = h__[i__2].i; + i__2 = l + 1 + (l + 1) * h_dim1; + h22.r = h__[i__2].r, h22.i = h__[i__2].i; + z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; + h11s.r = z__1.r, h11s.i = z__1.i; + i__2 = l + 1 + l * h_dim1; + h21 = h__[i__2].r; + s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) + + abs(h21); + z__1.r = h11s.r / s, z__1.i = h11s.i / s; + h11s.r = z__1.r, h11s.i = z__1.i; + h21 /= s; + v[0].r = h11s.r, v[0].i = h11s.i; + v[1].r = h21, v[1].i = 0.; +L50: + +/* Single-shift QR step */ + + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + +/* + The first iteration of this loop determines a reflection G + from the vector V and applies it from left and right to H, + thus creating a nonzero bulge below the subdiagonal. + + Each subsequent iteration determines a reflection G to + restore the Hessenberg form in the (K-1)th column, and thus + chases the bulge one step toward the bottom of the active + submatrix. + + V(2) is always real before the call to ZLARFG, and hence + after the call T2 ( = T1*V(2) ) is also real. +*/ + + if (k > m) { + zcopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + zlarfg_(&c__2, v, &v[1], &c__1, &t1); + if (k > m) { + i__3 = k + (k - 1) * h_dim1; + h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; + i__3 = k + 1 + (k - 1) * h_dim1; + h__[i__3].r = 0., h__[i__3].i = 0.; + } + v2.r = v[1].r, v2.i = v[1].i; + z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i + t1.i * + v2.r; + t2 = z__1.r; + +/* + Apply G from the left to transform the rows of the matrix + in columns K to I2. +*/ + + i__3 = i2; + for (j = k; j <= i__3; ++j) { + d_cnjg(&z__3, &t1); + i__4 = k + j * h_dim1; + z__2.r = z__3.r * h__[i__4].r - z__3.i * h__[i__4].i, z__2.i = + z__3.r * h__[i__4].i + z__3.i * h__[i__4].r; + i__5 = k + 1 + j * h_dim1; + z__4.r = t2 * h__[i__5].r, z__4.i = t2 * h__[i__5].i; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + sum.r = z__1.r, sum.i = z__1.i; + i__4 = k + j * h_dim1; + i__5 = k + j * h_dim1; + z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; + i__4 = k + 1 + j * h_dim1; + i__5 = k + 1 + j * h_dim1; + z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = sum.r * v2.i + + sum.i * v2.r; + z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; +/* L60: */ + } + +/* + Apply G from the right to transform the columns of the + matrix in rows I1 to min(K+2,I). + + Computing MIN +*/ + i__4 = k + 2; + i__3 = min(i__4,i__); + for (j = i1; j <= i__3; ++j) { + i__4 = j + k * h_dim1; + z__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, z__2.i = + t1.r * h__[i__4].i + t1.i * h__[i__4].r; + i__5 = j + (k + 1) * h_dim1; + z__3.r = t2 * h__[i__5].r, z__3.i = t2 * h__[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__4 = j + k * h_dim1; + i__5 = j + k * h_dim1; + z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; + i__4 = j + (k + 1) * h_dim1; + i__5 = j + (k + 1) * h_dim1; + d_cnjg(&z__3, &v2); + z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * + z__3.i + sum.i * z__3.r; + z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i; + h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; +/* L70: */ + } + + if (*wantz) { + +/* Accumulate transformations in the matrix Z */ + + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + i__4 = j + k * z_dim1; + z__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, z__2.i = + t1.r * z__[i__4].i + t1.i * z__[i__4].r; + i__5 = j + (k + 1) * z_dim1; + z__3.r = t2 * z__[i__5].r, z__3.i = t2 * z__[i__5].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__4 = j + k * z_dim1; + i__5 = j + k * z_dim1; + z__1.r = z__[i__5].r - sum.r, z__1.i = z__[i__5].i - + sum.i; + z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; + i__4 = j + (k + 1) * z_dim1; + i__5 = j + (k + 1) * z_dim1; + d_cnjg(&z__3, &v2); + z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * + z__3.i + sum.i * z__3.r; + z__1.r = z__[i__5].r - z__2.r, z__1.i = z__[i__5].i - + z__2.i; + z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; +/* L80: */ + } + } + + if (k == m && m > l) { + +/* + If the QR step was started at row M > L because two + consecutive small subdiagonals were found, then extra + scaling must be performed to ensure that H(M,M-1) remains + real. +*/ + + z__1.r = 1. - t1.r, z__1.i = 0. - t1.i; + temp.r = z__1.r, temp.i = z__1.i; + d__1 = z_abs(&temp); + z__1.r = temp.r / d__1, z__1.i = temp.i / d__1; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = m + 1 + m * h_dim1; + i__4 = m + 1 + m * h_dim1; + d_cnjg(&z__2, &temp); + z__1.r = h__[i__4].r * z__2.r - h__[i__4].i * z__2.i, z__1.i = + h__[i__4].r * z__2.i + h__[i__4].i * z__2.r; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; + if (m + 2 <= i__) { + i__3 = m + 2 + (m + 1) * h_dim1; + i__4 = m + 2 + (m + 1) * h_dim1; + z__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i, + z__1.i = h__[i__4].r * temp.i + h__[i__4].i * + temp.r; + h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; + } + i__3 = i__; + for (j = m; j <= i__3; ++j) { + if (j != m + 1) { + if (i2 > j) { + i__4 = i2 - j; + zscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1], + ldh); + } + i__4 = j - i1; + d_cnjg(&z__1, &temp); + zscal_(&i__4, &z__1, &h__[i1 + j * h_dim1], &c__1); + if (*wantz) { + d_cnjg(&z__1, &temp); + zscal_(&nz, &z__1, &z__[*iloz + j * z_dim1], & + c__1); + } + } +/* L90: */ + } + } +/* L100: */ + } + +/* Ensure that H(I,I-1) is real. */ + + i__2 = i__ + (i__ - 1) * h_dim1; + temp.r = h__[i__2].r, temp.i = h__[i__2].i; + if (d_imag(&temp) != 0.) { + rtemp = z_abs(&temp); + i__2 = i__ + (i__ - 1) * h_dim1; + h__[i__2].r = rtemp, h__[i__2].i = 0.; + z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; + temp.r = z__1.r, temp.i = z__1.i; + if (i2 > i__) { + i__2 = i2 - i__; + d_cnjg(&z__1, &temp); + zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); + } + i__2 = i__ - i1; + zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); + if (*wantz) { + zscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); + } + } + +/* L110: */ + } + +/* Failure to converge in remaining number of iterations */ + + *info = i__; + return 0; + +L120: + +/* H(I,I-1) is negligible: one eigenvalue has converged. */ + + i__1 = i__; + i__2 = i__ + i__ * h_dim1; + w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; + +/* + Decrement number of remaining iterations, and return to start of + the main loop with new value of I. +*/ + + itn -= its; + i__ = l - 1; + goto L10; + +L130: + return 0; + +/* End of ZLAHQR */ + +} /* zlahqr_ */ + +/* Subroutine */ int zlahrd_(integer *n, integer *k, integer *nb, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, + integer *ldt, doublecomplex *y, integer *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, + i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublecomplex ei; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *), ztrmv_(char *, char *, + char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zlarfg_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *), + zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) + matrix A so that elements below the k-th subdiagonal are zero. The + reduction is performed by a unitary similarity transformation + Q' * A * Q. The routine returns the matrices V and T which determine + Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. + + This is an auxiliary routine called by ZGEHRD. + + Arguments + ========= + + N (input) INTEGER + The order of the matrix A. + + K (input) INTEGER + The offset for the reduction. Elements below the k-th + subdiagonal in the first NB columns are reduced to zero. + + NB (input) INTEGER + The number of columns to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N-K+1) + On entry, the n-by-(n-k+1) general matrix A. + On exit, the elements on and above the k-th subdiagonal in + the first NB columns are overwritten with the corresponding + elements of the reduced matrix; the elements below the k-th + subdiagonal, with the array TAU, represent the matrix Q as a + product of elementary reflectors. The other columns of A are + unchanged. See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (output) COMPLEX*16 array, dimension (NB) + The scalar factors of the elementary reflectors. See Further + Details. + + T (output) COMPLEX*16 array, dimension (LDT,NB) + The upper triangular matrix T. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= NB. + + Y (output) COMPLEX*16 array, dimension (LDY,NB) + The n-by-nb matrix Y. + + LDY (input) INTEGER + The leading dimension of the array Y. LDY >= max(1,N). + + Further Details + =============== + + The matrix Q is represented as a product of nb elementary reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in + A(i+k+1:n,i), and tau in TAU(i). + + The elements of the vectors v together form the (n-k+1)-by-nb matrix + V which is needed, with T and Y, to apply the transformation to the + unreduced part of the matrix, using an update of the form: + A := (I - V*T*V') * (A - Y*V'). + + The contents of A on exit are illustrated by the following example + with n = 7, k = 3 and nb = 2: + + ( a h a a a ) + ( a h a a a ) + ( a h a a a ) + ( h h a a a ) + ( v1 h a a a ) + ( v1 v2 a a a ) + ( v1 v2 a a a ) + + where a denotes an element of the original matrix A, h denotes a + modified element of the upper Hessenberg matrix H, and vi denotes an + element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*n <= 1) { + return 0; + } + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + +/* + Update A(1:n,i) + + Compute i-th column of A - Y * V' +*/ + + i__2 = i__ - 1; + zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + i__2 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &a[*k + + i__ - 1 + a_dim1], lda, &c_b60, &a[i__ * a_dim1 + 1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); + +/* + Apply I - V * T' * V' to this column (call it b) from the + left, using the last column of T as workspace + + Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) + ( V2 ) ( b2 ) + + where V1 is unit lower triangular + + w := V1' * b1 +*/ + + i__2 = i__ - 1; + zcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + + 1], &c__1); + i__2 = i__ - 1; + ztrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + + a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); + +/* w := w + V2'*b2 */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b60, + &t[*nb * t_dim1 + 1], &c__1); + +/* w := T'*w */ + + i__2 = i__ - 1; + ztrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); + +/* b2 := b2 - V2*w */ + + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1], + lda, &t[*nb * t_dim1 + 1], &c__1, &c_b60, &a[*k + i__ + + i__ * a_dim1], &c__1); + +/* b1 := b1 - V1*w */ + + i__2 = i__ - 1; + ztrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] + , lda, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zaxpy_(&i__2, &z__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ + * a_dim1], &c__1); + + i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; + a[i__2].r = ei.r, a[i__2].i = ei.i; + } + +/* + Generate the elementary reflector H(i) to annihilate + A(k+i+1:n,i) +*/ + + i__2 = *k + i__ + i__ * a_dim1; + ei.r = a[i__2].r, ei.i = a[i__2].i; + i__2 = *n - *k - i__ + 1; +/* Computing MIN */ + i__3 = *k + i__ + 1; + zlarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__]) + ; + i__2 = *k + i__ + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute Y(1:n,i) */ + + i__2 = *n - *k - i__ + 1; + zgemv_("No transpose", n, &i__2, &c_b60, &a[(i__ + 1) * a_dim1 + 1], + lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &y[i__ * + y_dim1 + 1], &c__1); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ + + a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &t[ + i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &t[i__ * + t_dim1 + 1], &c__1, &c_b60, &y[i__ * y_dim1 + 1], &c__1); + zscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); + +/* Compute T(1:i,i) */ + + i__2 = i__ - 1; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zscal_(&i__2, &z__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, + &t[i__ * t_dim1 + 1], &c__1) + ; + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + +/* L10: */ + } + i__1 = *k + *nb + *nb * a_dim1; + a[i__1].r = ei.r, a[i__1].i = ei.i; + + return 0; + +/* End of ZLAHRD */ + +} /* zlahrd_ */ + +/* Subroutine */ int zlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, + doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, + integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, + doublereal *poles, doublereal *difl, doublereal *difr, doublereal * + z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, + integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, + givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, + bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + static integer i__, j, m, n; + static doublereal dj; + static integer nlp1, jcol; + static doublereal temp; + static integer jrow; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + static doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *), zdrot_(integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), xerbla_(char *, integer *); + static doublereal dsigjp; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublecomplex * + , integer *, integer *), zlacpy_(char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + December 1, 1999 + + + Purpose + ======= + + ZLALS0 applies back the multiplying factors of either the left or the + right singular vector matrix of a diagonal matrix appended by a row + to the right hand side matrix B in solving the least squares problem + using the divide-and-conquer SVD approach. + + For the left singular vector matrix, three types of orthogonal + matrices are involved: + + (1L) Givens rotations: the number of such rotations is GIVPTR; the + pairs of columns/rows they were applied to are stored in GIVCOL; + and the C- and S-values of these rotations are stored in GIVNUM. + + (2L) Permutation. The (NL+1)-st row of B is to be moved to the first + row, and for J=2:N, PERM(J)-th row of B is to be moved to the + J-th row. + + (3L) The left singular vector matrix of the remaining matrix. + + For the right singular vector matrix, four types of orthogonal + matrices are involved: + + (1R) The right singular vector matrix of the remaining matrix. + + (2R) If SQRE = 1, one extra Givens rotation to generate the right + null space. + + (3R) The inverse transformation of (2L). + + (4R) The inverse transformation of (1L). + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether singular vectors are to be computed in + factored form: + = 0: Left singular vector matrix. + = 1: Right singular vector matrix. + + NL (input) INTEGER + The row dimension of the upper block. NL >= 1. + + NR (input) INTEGER + The row dimension of the lower block. NR >= 1. + + SQRE (input) INTEGER + = 0: the lower block is an NR-by-NR square matrix. + = 1: the lower block is an NR-by-(NR+1) rectangular matrix. + + The bidiagonal matrix has row dimension N = NL + NR + 1, + and column dimension M = N + SQRE. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B. LDB must be at least + max(1,MAX( M, N ) ). + + BX (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS ) + + LDBX (input) INTEGER + The leading dimension of BX. + + PERM (input) INTEGER array, dimension ( N ) + The permutations (from deflation and sorting) applied + to the two blocks. + + GIVPTR (input) INTEGER + The number of Givens rotations which took place in this + subproblem. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) + Each pair of numbers indicates a pair of rows/columns + involved in a Givens rotation. + + LDGCOL (input) INTEGER + The leading dimension of GIVCOL, must be at least N. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + Each number indicates the C or S value used in the + corresponding Givens rotation. + + LDGNUM (input) INTEGER + The leading dimension of arrays DIFR, POLES and + GIVNUM, must be at least K. + + POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) + On entry, POLES(1:K, 1) contains the new singular + values obtained from solving the secular equation, and + POLES(1:K, 2) is an array containing the poles in the secular + equation. + + DIFL (input) DOUBLE PRECISION array, dimension ( K ). + On entry, DIFL(I) is the distance between I-th updated + (undeflated) singular value and the I-th (undeflated) old + singular value. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). + On entry, DIFR(I, 1) contains the distances between I-th + updated (undeflated) singular value and the I+1-th + (undeflated) old singular value. And DIFR(I, 2) is the + normalizing factor for the I-th right singular vector. + + Z (input) DOUBLE PRECISION array, dimension ( K ) + Contain the components of the deflation-adjusted updating row + vector. + + K (input) INTEGER + Contains the dimension of the non-deflated matrix, + This is the order of the related secular equation. 1 <= K <=N. + + C (input) DOUBLE PRECISION + C contains garbage if SQRE =0 and the C-value of a Givens + rotation related to the right null space if SQRE = 1. + + S (input) DOUBLE PRECISION + S contains garbage if SQRE =0 and the S-value of a Givens + rotation related to the right null space if SQRE = 1. + + RWORK (workspace) DOUBLE PRECISION array, dimension + ( K*(1+NRHS) + 2*NRHS ) + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --rwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } + + n = *nl + *nr + 1; + + if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALS0", &i__1); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* + Apply back orthogonal transformations from the left. + + Step (1L): apply back the Givens rotations performed. +*/ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + zcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + zcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* + Step (3L): apply the inverse of the left singular vector + matrix to BX. +*/ + + if (*k == 1) { + zcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + zdscal_(nrhs, &c_b1294, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + rwork[j] = 0.; + } else { + rwork[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj + / (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + rwork[i__] = 0.; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + rwork[i__] = 0.; + } else { + rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + rwork[1] = -1.; + temp = dnrm2_(k, &rwork[1], &c__1); + +/* + Since B and BX are complex, the following call to DGEMV + is performed in two steps (real and imaginary parts). + + CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * bx_dim1; + rwork[i__] = bx[i__4].r; +/* L50: */ + } +/* L60: */ + } + dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = d_imag(&bx[jrow + jcol * bx_dim1]); +/* L70: */ + } +/* L80: */ + } + dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * b_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L90: */ + } + zlascl_("G", &c__0, &c__0, &temp, &c_b1015, &c__1, nrhs, &b[j + + b_dim1], ldb, info); +/* L100: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + zlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb); + } + } else { + +/* + Apply back the right orthogonal transformations. + + Step (1R): apply back the new right singular vector matrix + to B. +*/ + + if (*k == 1) { + zcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + rwork[j] = 0.; + } else { + rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + rwork[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L110: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + rwork[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L120: */ + } + +/* + Since B and BX are complex, the following call to DGEMV + is performed in two steps (real and imaginary parts). + + CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) +*/ + + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + i__4 = jrow + jcol * b_dim1; + rwork[i__] = b[i__4].r; +/* L130: */ + } +/* L140: */ + } + dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1); + i__ = *k + (*nrhs << 1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = *k; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++i__; + rwork[i__] = d_imag(&b[jrow + jcol * b_dim1]); +/* L150: */ + } +/* L160: */ + } + dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], + k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs], + &c__1); + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = j + jcol * bx_dim1; + i__4 = jcol + *k; + i__5 = jcol + *k + *nrhs; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + bx[i__3].r = z__1.r, bx[i__3].i = z__1.i; +/* L170: */ + } +/* L180: */ + } + } + +/* + Step (2R): if SQRE = 1, apply back the rotation that is + related to the right null space of the subproblem. +*/ + + if (*sqre == 1) { + zcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + zdrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + zlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx); + } + +/* Step (3R): permute rows of B. */ + + zcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + zcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + zcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L190: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L200: */ + } + } + + return 0; + +/* End of ZLALS0 */ + +} /* zlals0_ */ + +/* Subroutine */ int zlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, + integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * + k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, + i__2, i__3, i__4, i__5, i__6; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + integer pow_ii(integer *, integer *); + + /* Local variables */ + static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, + ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer jreal, inode, ndiml, ndimr; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlals0_(integer *, integer *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasdt_(integer *, integer *, integer * + , integer *, integer *, integer *, integer *), xerbla_(char *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLALSA is an itermediate step in solving the least squares problem + by computing the SVD of the coefficient matrix in compact form (The + singular vectors are computed as products of simple orthorgonal + matrices.). + + If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector + matrix of an upper bidiagonal matrix to the right hand side; and if + ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the + right hand side. The singular vector matrices were generated in + compact form by ZLALSA. + + Arguments + ========= + + ICOMPQ (input) INTEGER + Specifies whether the left or the right singular vector + matrix is involved. + = 0: Left singular vector matrix + = 1: Right singular vector matrix + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The row and column dimensions of the upper bidiagonal matrix. + + NRHS (input) INTEGER + The number of columns of B and BX. NRHS must be at least 1. + + B (input) COMPLEX*16 array, dimension ( LDB, NRHS ) + On input, B contains the right hand sides of the least + squares problem in rows 1 through M. On output, B contains + the solution X in rows 1 through N. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,MAX( M, N ) ). + + BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS ) + On exit, the result of applying the left or right singular + vector matrix to B. + + LDBX (input) INTEGER + The leading dimension of BX. + + U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). + On entry, U contains the left singular vector matrices of all + subproblems at the bottom level. + + LDU (input) INTEGER, LDU = > N. + The leading dimension of arrays U, VT, DIFL, DIFR, + POLES, GIVNUM, and Z. + + VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). + On entry, VT' contains the right singular vector matrices of + all subproblems at the bottom level. + + K (input) INTEGER array, dimension ( N ). + + DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. + + DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record + distances between singular values on the I-th level and + singular values on the (I -1)-th level, and DIFR(*, 2 * I) + record the normalizing factors of the right singular vectors + matrices of subproblems on I-th level. + + Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). + On entry, Z(1, I) contains the components of the deflation- + adjusted updating row vector for subproblems on the I-th + level. + + POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old + singular values involved in the secular equations on the I-th + level. + + GIVPTR (input) INTEGER array, dimension ( N ). + On entry, GIVPTR( I ) records the number of Givens + rotations performed on the I-th problem on the computation + tree. + + GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). + On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the + locations of Givens rotations performed on the I-th level on + the computation tree. + + LDGCOL (input) INTEGER, LDGCOL = > N. + The leading dimension of arrays GIVCOL and PERM. + + PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). + On entry, PERM(*, I) records permutations done on the I-th + level of the computation tree. + + GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). + On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- + values of Givens rotations performed on the I-th level on the + computation tree. + + C (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + C( I ) contains the C-value of a Givens rotation related to + the right null space of the I-th subproblem. + + S (input) DOUBLE PRECISION array, dimension ( N ). + On entry, if the I-th subproblem is not square, + S( I ) contains the S-value of a Givens rotation related to + the right null space of the I-th subproblem. + + RWORK (workspace) DOUBLE PRECISION array, dimension at least + max ( N, (SMLSZ+1)*NRHS*3 ). + + IWORK (workspace) INTEGER array. + The dimension must be at least 3 * N + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALSA", &i__1); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* + The following code applies back the left singular vector factors. + For applying back the right singular vector factors, go to 170. +*/ + + if (*icompq == 1) { + goto L170; + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding left and right singular vector + matrices are in explicit form. First apply back the left + singular vector matrices. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* + IC : center row of each node + NL : number of rows of left subproblem + NR : number of rows of right subproblem + NLF: starting row of the left subproblem + NRF: starting row of the right subproblem +*/ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L10: */ + } +/* L20: */ + } + dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[1], &nl); + j = nl * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L30: */ + } +/* L40: */ + } + dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, & + rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[nl * *nrhs + + 1], &nl); + jreal = 0; + jimag = nl * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nl - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L70: */ + } +/* L80: */ + } + dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[1], &nr); + j = nr * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L90: */ + } +/* L100: */ + } + dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, & + rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[nr * *nrhs + + 1], &nr); + jreal = 0; + jimag = nr * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nr - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + +/* L130: */ + } + +/* + Next copy the rows of B that correspond to unchanged rows + in the bidiagonal matrix to BX. +*/ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + zcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L140: */ + } + +/* + Finally go through the left singular vector matrices of all + the other subproblems bottom-up on the tree. +*/ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* + find the first node LF and last node LL on + the current level LVL +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + zlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L150: */ + } +/* L160: */ + } + goto L330; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L170: + +/* + First now go through the right singular vector matrices of all + the tree nodes top-down. +*/ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* + Find the first node LF and last node LL on + the current level LVL. +*/ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + zlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &rwork[1], info); +/* L180: */ + } +/* L190: */ + } + +/* + The nodes on the bottom level of the tree were solved + by DLASDQ. The corresponding right singular vector + matrices are in explicit form. Apply them back. +*/ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + +/* + Since B and BX are complex, the following call to DGEMM is + performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +*/ + + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L200: */ + } +/* L210: */ + } + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[ + 1], &nlp1); + j = nlp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L220: */ + } +/* L230: */ + } + dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1], + ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[ + nlp1 * *nrhs + 1], &nlp1); + jreal = 0; + jimag = nlp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nlf + nlp1 - 1; + for (jrow = nlf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L240: */ + } +/* L250: */ + } + +/* + Since B and BX are complex, the following call to DGEMM is + performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +*/ + + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L260: */ + } +/* L270: */ + } + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[ + 1], &nrp1); + j = nrp1 * *nrhs << 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L280: */ + } +/* L290: */ + } + dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1], + ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[ + nrp1 * *nrhs + 1], &nrp1); + jreal = 0; + jimag = nrp1 * *nrhs; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = nrf + nrp1 - 1; + for (jrow = nrf; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * bx_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; +/* L300: */ + } +/* L310: */ + } + +/* L320: */ + } + +L330: + + return 0; + +/* End of ZLALSA */ + +} /* zlalsa_ */ + +/* Subroutine */ int zlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, + doublereal *rcond, integer *rank, doublecomplex *work, doublereal * + rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *), log(doublereal), d_sign(doublereal *, + doublereal *); + + /* Local variables */ + static integer c__, i__, j, k; + static doublereal r__; + static integer s, u, z__; + static doublereal cs; + static integer bx; + static doublereal sn; + static integer st, vt, nm1, st1; + static doublereal eps; + static integer iwk; + static doublereal tol; + static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow, + irwu, jimag; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + static integer jreal, irwib, poles, sizei, irwrb, nsize; + extern /* Subroutine */ int zdrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + ; + static integer irwvt, icmpq1, icmpq2; + + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublereal *, integer *, + integer *); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *), dlaset_(char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), xerbla_(char *, integer *); + static integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int zlalsa_(integer *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *), zlascl_(char *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublecomplex *, integer *, integer *), dlasrt_(char *, + integer *, doublereal *, integer *), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *), zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + static doublereal orgnrm; + static integer givnum, givptr, nrwork, irwwrk, smlszp; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1999 + + + Purpose + ======= + + ZLALSD uses the singular value decomposition of A to solve the least + squares problem of finding X to minimize the Euclidean norm of each + column of A*X-B, where A is N-by-N upper bidiagonal, and X and B + are N-by-NRHS. The solution X overwrites B. + + The singular values of A smaller than RCOND times the largest + singular value are treated as zero in solving the least squares + problem; in this case a minimum norm solution is returned. + The actual singular values are returned in D in ascending order. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': D and E define an upper bidiagonal matrix. + = 'L': D and E define a lower bidiagonal matrix. + + SMLSIZ (input) INTEGER + The maximum size of the subproblems at the bottom of the + computation tree. + + N (input) INTEGER + The dimension of the bidiagonal matrix. N >= 0. + + NRHS (input) INTEGER + The number of columns of B. NRHS must be at least 1. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry D contains the main diagonal of the bidiagonal + matrix. On exit, if INFO = 0, D contains its singular values. + + E (input) DOUBLE PRECISION array, dimension (N-1) + Contains the super-diagonal entries of the bidiagonal matrix. + On exit, E has been destroyed. + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On input, B contains the right hand sides of the least + squares problem. On output, B contains the solution X. + + LDB (input) INTEGER + The leading dimension of B in the calling subprogram. + LDB must be at least max(1,N). + + RCOND (input) DOUBLE PRECISION + The singular values of A less than or equal to RCOND times + the largest singular value are treated as zero in solving + the least squares problem. If RCOND is negative, + machine precision is used instead. + For example, if diag(S)*X=B were the least squares problem, + where diag(S) is a diagonal matrix of singular values, the + solution would be X(i) = B(i) / S(i) if S(i) is greater than + RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to + RCOND*max(S). + + RANK (output) INTEGER + The number of singular values of A greater than RCOND times + the largest singular value. + + WORK (workspace) COMPLEX*16 array, dimension at least + (N * NRHS). + + RWORK (workspace) DOUBLE PRECISION array, dimension at least + (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2), + where + NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) + + IWORK (workspace) INTEGER array, dimension at least + (3*N*NLVL + 11*N). + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an singular value while + working on the submatrix lying in rows and columns + INFO/(N+1) through MOD(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Ming Gu and Ren-Cang Li, Computer Science Division, University of + California at Berkeley, USA + Osni Marques, LBNL/NERSC, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLALSD", &i__1); + return 0; + } + + eps = EPSILON; + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + *rcond = eps; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); + } else { + *rank = 1; + zlascl_("G", &c__0, &c__0, &d__[1], &c_b1015, &c__1, nrhs, &b[ + b_offset], ldb, info); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + zdrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + rwork[(i__ << 1) - 1] = cs; + rwork[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = rwork[(j << 1) - 1]; + sn = rwork[j * 2]; + zdrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ + * b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + zlaset_("A", n, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); + return 0; + } + + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, &c__1, &d__[1], n, info); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &nm1, &c__1, &e[1], &nm1, + info); + +/* + If N is smaller than the minimum divide size SMLSIZ, then solve + the problem with another solver. +*/ + + if (*n <= *smlsiz) { + irwu = 1; + irwvt = irwu + *n * *n; + irwwrk = irwvt + *n * *n; + irwrb = irwwrk; + irwib = irwrb + *n * *nrhs; + irwb = irwib + *n * *nrhs; + dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwu], n); + dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwvt], n); + dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n, + &rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info); + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to DLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L40: */ + } +/* L50: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb], + n, &c_b324, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L60: */ + } +/* L70: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb], + n, &c_b324, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[i__ + b_dim1], + ldb); + } else { + zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, & + b[i__ + b_dim1], ldb, info); + ++(*rank); + } +/* L100: */ + } + +/* + Since B is complex, the following call to DGEMM is performed + in two steps (real and imaginary parts). That is for V * B + (in the real version of the code V' is stored in WORK). + + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) +*/ + + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + i__3 = jrow + jcol * b_dim1; + rwork[j] = b[i__3].r; +/* L110: */ + } +/* L120: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb], + n, &c_b324, &rwork[irwrb], n); + j = irwb - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L130: */ + } +/* L140: */ + } + dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb], + n, &c_b324, &rwork[irwib], n); + jreal = irwrb - 1; + jimag = irwib - 1; + i__1 = *nrhs; + for (jcol = 1; jcol <= i__1; ++jcol) { + i__2 = *n; + for (jrow = 1; jrow <= i__2; ++jrow) { + ++jreal; + ++jimag; + i__3 = jrow + jcol * b_dim1; + i__4 = jreal; + i__5 = jimag; + z__1.r = rwork[i__4], z__1.i = rwork[i__5]; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L150: */ + } +/* L160: */ + } + +/* Unscale. */ + + dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, + info); + dlasrt_("D", n, &d__[1], info); + zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], + ldb, info); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + nrwork = givnum + (nlvl << 1) * *n; + bx = 1; + + irwrb = nrwork; + irwib = irwrb + *smlsiz * *nrhs; + irwb = irwib + *smlsiz * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L170: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* + Subproblem found. First determine its size and then + apply divide and conquer on it. +*/ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* + A subproblem with E(NM1) small. This implies an + 1-by-1 subproblem at D(N), which is not solved + explicitly. +*/ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + zcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* + This is a 1-by-1 subproblem and is not solved + explicitly. +*/ + + zcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[vt + + st1], n); + dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[u + + st1], n); + dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], & + e[st], &rwork[vt + st1], n, &rwork[u + st1], n, & + rwork[nrwork], &c__1, &rwork[nrwork], info) + ; + if (*info != 0) { + return 0; + } + +/* + In the real version, B is passed to DLASDQ and multiplied + internally by Q'. Here B is complex and that product is + computed below in two steps (real and imaginary parts). +*/ + + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + i__4 = jrow + jcol * b_dim1; + rwork[j] = b[i__4].r; +/* L180: */ + } +/* L190: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], + &nsize); + j = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++j; + rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); +/* L200: */ + } +/* L210: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u + + st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], + &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L220: */ + } +/* L230: */ + } + + zlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1], + &rwork[difl + st1], &rwork[difr + st1], &rwork[z__ + + st1], &rwork[poles + st1], &iwork[givptr + st1], & + iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], & + rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1] + , &rwork[z__ + st1], &rwork[poles + st1], &iwork[ + givptr + st1], &iwork[givcol + st1], n, &iwork[perm + + st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[ + s + st1], &rwork[nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L240: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* + Some of the elements in D can be negative because 1-by-1 + subproblems were not solved explicitly. +*/ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &work[bx + i__ - 1], n); + } else { + ++(*rank); + zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, & + work[bx + i__ - 1], n, info); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L250: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + zcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + +/* + Since B and BX are complex, the following call to DGEMM + is performed in two steps (real and imaginary parts). + + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) +*/ + + j = bxst - *n - 1; + jreal = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jreal; + i__4 = j + jrow; + rwork[jreal] = work[i__4].r; +/* L260: */ + } +/* L270: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], &nsize); + j = bxst - *n - 1; + jimag = irwb - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + j += *n; + i__3 = nsize; + for (jrow = 1; jrow <= i__3; ++jrow) { + ++jimag; + rwork[jimag] = d_imag(&work[j + jrow]); +/* L280: */ + } +/* L290: */ + } + dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1], + n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], &nsize); + jreal = irwrb - 1; + jimag = irwib - 1; + i__2 = *nrhs; + for (jcol = 1; jcol <= i__2; ++jcol) { + i__3 = st + nsize - 1; + for (jrow = st; jrow <= i__3; ++jrow) { + ++jreal; + ++jimag; + i__4 = jrow + jcol * b_dim1; + i__5 = jreal; + i__6 = jimag; + z__1.r = rwork[i__5], z__1.i = rwork[i__6]; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L300: */ + } +/* L310: */ + } + } else { + zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], & + iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], & + rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ + givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[ + nrwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } +/* L320: */ + } + +/* Unscale and sort the singular values. */ + + dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, info); + dlasrt_("D", n, &d__[1], info); + zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], ldb, + info); + + return 0; + +/* End of ZLALSD */ + +} /* zlalsd_ */ + +doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2; + + /* Builtin functions */ + double z_abs(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLANGE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex matrix A. + + Description + =========== + + ZLANGE returns the value + + ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in ZLANGE as described + above. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. When M = 0, + ZLANGE is set to zero. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. When N = 0, + ZLANGE is set to zero. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(M,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= M when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += z_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += z_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + zlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANGE */ + +} /* zlange_ */ + +doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double z_abs(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, absa, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLANHE returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + complex hermitian matrix A. + + Description + =========== + + ZLANHE returns the value + + ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in ZLANHE as described + above. + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + hermitian matrix A is to be referenced. + = 'U': Upper triangular part of A is referenced + = 'L': Lower triangular part of A is referenced + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, ZLANHE is + set to zero. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The hermitian matrix A. If UPLO = 'U', the leading n by n + upper triangular part of A contains the upper triangular part + of the matrix A, and the strictly lower triangular part of A + is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of A contains the lower triangular part of + the matrix A, and the strictly upper triangular part of A is + not referenced. Note that the imaginary parts of the diagonal + elements need not be set and are assumed to be zero. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, + WORK is not referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L10: */ + } +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); + value = max(d__2,d__3); +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j + j * a_dim1; + d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); + value = max(d__2,d__3); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is hermitian). */ + + value = 0.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L50: */ + } + i__2 = j + j * a_dim1; + work[j] = sum + (d__1 = a[i__2].r, abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = work[j] + (d__1 = a[i__2].r, abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L90: */ + } + value = max(value,sum); +/* L100: */ + } + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, "U")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + zlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (a[i__2].r != 0.) { + i__2 = i__ + i__ * a_dim1; + absa = (d__1 = a[i__2].r, abs(d__1)); + if (scale < absa) { +/* Computing 2nd power */ + d__1 = scale / absa; + sum = sum * (d__1 * d__1) + 1.; + scale = absa; + } else { +/* Computing 2nd power */ + d__1 = absa / scale; + sum += d__1 * d__1; + } + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANHE */ + +} /* zlanhe_ */ + +doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, + doublereal *work) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublereal ret_val, d__1, d__2; + + /* Builtin functions */ + double z_abs(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + static integer i__, j; + static doublereal sum, scale; + extern logical lsame_(char *, char *); + static doublereal value; + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLANHS returns the value of the one norm, or the Frobenius norm, or + the infinity norm, or the element of largest absolute value of a + Hessenberg matrix A. + + Description + =========== + + ZLANHS returns the value + + ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' + ( + ( norm1(A), NORM = '1', 'O' or 'o' + ( + ( normI(A), NORM = 'I' or 'i' + ( + ( normF(A), NORM = 'F', 'f', 'E' or 'e' + + where norm1 denotes the one norm of a matrix (maximum column sum), + normI denotes the infinity norm of a matrix (maximum row sum) and + normF denotes the Frobenius norm of a matrix (square root of sum of + squares). Note that max(abs(A(i,j))) is not a matrix norm. + + Arguments + ========= + + NORM (input) CHARACTER*1 + Specifies the value to be returned in ZLANHS as described + above. + + N (input) INTEGER + The order of the matrix A. N >= 0. When N = 0, ZLANHS is + set to zero. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The n by n upper Hessenberg matrix A; the part of A below the + first sub-diagonal is not referenced. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(N,1). + + WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), + where LWORK >= N when NORM = 'I'; otherwise, WORK is not + referenced. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, "M")) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); + value = max(d__1,d__2); +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, "O") || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + sum += z_abs(&a[i__ + j * a_dim1]); +/* L30: */ + } + value = max(value,sum); +/* L40: */ + } + } else if (lsame_(norm, "I")) { + +/* Find normI(A). */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += z_abs(&a[i__ + j * a_dim1]); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = value, d__2 = work[i__]; + value = max(d__1,d__2); +/* L80: */ + } + } else if (lsame_(norm, "F") || lsame_(norm, "E")) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = *n, i__4 = j + 1; + i__2 = min(i__3,i__4); + zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANHS */ + +} /* zlanhs_ */ + +/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer * + lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, + doublereal *rwork) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLARCM performs a very simple matrix-matrix multiplication: + C := A * B, + where A is M by M and real; B is M by N and complex; + C is M by N and complex. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix A and of the matrix C. + M >= 0. + + N (input) INTEGER + The number of columns and rows of the matrix B and + the number of columns of the matrix C. + N >= 0. + + A (input) DOUBLE PRECISION array, dimension (LDA, M) + A contains the M by M matrix A. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >=max(1,M). + + B (input) DOUBLE PRECISION array, dimension (LDB, N) + B contains the M by N matrix B. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >=max(1,M). + + C (input) COMPLEX*16 array, dimension (LDC, N) + C contains the M by N matrix C. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >=max(1,M). + + RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) + + ===================================================================== + + + Quick return if possible. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + rwork[(j - 1) * *m + i__] = b[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, & + c_b324, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = d_imag(&b[i__ + j * b_dim1]); +/* L50: */ + } +/* L60: */ + } + dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, & + c_b324, &rwork[l], m); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + d__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + z__1.r = d__1, z__1.i = rwork[i__5]; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of ZLARCM */ + +} /* zlarcm_ */ + +/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex + *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublecomplex z__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLARF applies a complex elementary reflector H to a complex M-by-N + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix. + + To apply H' (the conjugate transpose of H), supply conjg(tau) instead + tau. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX*16 array, dimension + (1 + (M-1)*abs(INCV)) if SIDE = 'L' + or (1 + (N-1)*abs(INCV)) if SIDE = 'R' + The vector v in the representation of H. V is not used if + TAU = 0. + + INCV (input) INTEGER + The increment between elements of v. INCV <> 0. + + TAU (input) COMPLEX*16 + The value tau in the representation of H. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L' + or (M) if SIDE = 'R' + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (lsame_(side, "L")) { + +/* Form H * C */ + + if (tau->r != 0. || tau->i != 0.) { + +/* w := C' * v */ + + zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, & + v[1], incv, &c_b59, &work[1], &c__1); + +/* C := C - v * w' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(m, n, &z__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], + ldc); + } + } else { + +/* Form C * H */ + + if (tau->r != 0. || tau->i != 0.) { + +/* w := C * v */ + + zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], + incv, &c_b59, &work[1], &c__1); + +/* C := C - w * v' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], + ldc); + } + } + return 0; + +/* End of ZLARF */ + +} /* zlarf_ */ + +/* Subroutine */ int zlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublecomplex *v, integer + *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * + ldc, doublecomplex *work, integer *ldwork) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zcopy_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), ztrmm_(char *, char *, + char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, + integer *); + static char transt[1]; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLARFB applies a complex block reflector H or its transpose H' to a + complex M-by-N matrix C, from either the left or the right. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply H or H' from the Left + = 'R': apply H or H' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply H (No transpose) + = 'C': apply H' (Conjugate transpose) + + DIRECT (input) CHARACTER*1 + Indicates how H is formed from a product of elementary + reflectors + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Indicates how the vectors which define the elementary + reflectors are stored: + = 'C': Columnwise + = 'R': Rowwise + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + K (input) INTEGER + The order of the matrix T (= the number of elementary + reflectors whose product defines the block reflector). + + V (input) COMPLEX*16 array, dimension + (LDV,K) if STOREV = 'C' + (LDV,M) if STOREV = 'R' and SIDE = 'L' + (LDV,N) if STOREV = 'R' and SIDE = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); + if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); + if STOREV = 'R', LDV >= K. + + T (input) COMPLEX*16 array, dimension (LDT,K) + The triangular K-by-K matrix T in the representation of the + block reflector. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by H*C or H'*C or C*H or C*H'. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension (LDWORK,K) + + LDWORK (input) INTEGER + The leading dimension of the array WORK. + If SIDE = 'L', LDWORK >= max(1,N); + if SIDE = 'R', LDWORK >= max(1,M). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, "N")) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, "C")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 ) (first K rows) + ( V2 ) + where V1 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2 */ + + i__1 = *m - *k; + zgemm_("Conjugate transpose", "No transpose", n, k, &i__1, + &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b60, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W' */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__1, n, k, + &z__1, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b60, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, + &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60, + &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + zgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b60, &work[work_offset], + ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C2 := C2 - W * V2' */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", m, &i__1, k, + &z__1, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, + &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* + Let V = ( V1 ) + ( V2 ) (last K rows) + where V2 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*m > *k) { + +/* W := W + C1'*V1 */ + + i__1 = *m - *k; + zgemm_("Conjugate transpose", "No transpose", n, k, &i__1, + &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b60, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W' */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__1, n, k, + &z__1, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b60, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, + &c_b60, &v[*m - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V = (C1*V1 + C2*V2) (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + zgemm_("No transpose", "No transpose", m, k, &i__1, & + c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b60, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V' */ + + if (*n > *k) { + +/* C1 := C1 - W * V1' */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", m, &i__1, k, + &z__1, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b60, &c__[c_offset], ldc); + } + +/* W := W * V2' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, + &c_b60, &v[*n - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, "R")) { + + if (lsame_(direct, "F")) { + +/* + Let V = ( V1 V2 ) (V1: first K columns) + where V1 is unit upper triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C1' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L130: */ + } + +/* W := W * V1' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, + &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); + if (*m > *k) { + +/* W := W + C2'*V2' */ + + i__1 = *m - *k; + zgemm_("Conjugate transpose", "Conjugate transpose", n, k, + &i__1, &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[(* + k + 1) * v_dim1 + 1], ldv, &c_b60, &work[ + work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C2 := C2 - V2' * W' */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, n, k, &z__1, &v[(*k + 1) * v_dim1 + 1], ldv, + &work[work_offset], ldwork, &c_b60, &c__[*k + 1 + + c_dim1], ldc); + } + +/* W := W * V1 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C1 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1' */ + + ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, + &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); + if (*n > *k) { + +/* W := W + C2 * V2' */ + + i__1 = *n - *k; + zgemm_("No transpose", "Conjugate transpose", m, k, &i__1, + &c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b60, &work[ + work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1, + &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1], + ldc); + } + +/* W := W * V1 */ + + ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60, + &v[v_offset], ldv, &work[work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* + Let V = ( V1 V2 ) (V2: last K columns) + where V2 is unit lower triangular. +*/ + + if (lsame_(side, "L")) { + +/* + Form H * C or H' * C where C = ( C1 ) + ( C2 ) + + W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) + + W := C2' +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, + &c_b60, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (*m > *k) { + +/* W := W + C1'*V1' */ + + i__1 = *m - *k; + zgemm_("Conjugate transpose", "Conjugate transpose", n, k, + &i__1, &c_b60, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b60, &work[work_offset], ldwork); + } + +/* W := W * T' or W * T */ + + ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - V' * W' */ + + if (*m > *k) { + +/* C1 := C1 - V1' * W' */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "Conjugate transpose", & + i__1, n, k, &z__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b60, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C2 := C2 - W' */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, "R")) { + +/* + Form C * H or C * H' where C = ( C1 C2 ) + + W := C * V' = (C1*V1' + C2*V2') (stored in WORK) + + W := C2 +*/ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2' */ + + ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, + &c_b60, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + if (*n > *k) { + +/* W := W + C1 * V1' */ + + i__1 = *n - *k; + zgemm_("No transpose", "Conjugate transpose", m, k, &i__1, + &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b60, &work[work_offset], ldwork); + } + +/* W := W * T or W * T' */ + + ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[ + t_offset], ldt, &work[work_offset], ldwork); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1, + &work[work_offset], ldwork, &v[v_offset], ldv, & + c_b60, &c__[c_offset], ldc); + } + +/* W := W * V2 */ + + ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of ZLARFB */ + +} /* zlarfb_ */ + +/* Subroutine */ int zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Builtin functions */ + double d_imag(doublecomplex *), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static integer j, knt; + static doublereal beta, alphi, alphr; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static doublereal xnorm; + extern doublereal dlapy3_(doublereal *, doublereal *, doublereal *), + dznrm2_(integer *, doublecomplex *, integer *), dlamch_(char *); + static doublereal safmin; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + static doublereal rsafmn; + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLARFG generates a complex elementary reflector H of order n, such + that + + H' * ( alpha ) = ( beta ), H' * H = I. + ( x ) ( 0 ) + + where alpha and beta are scalars, with beta real, and x is an + (n-1)-element complex vector. H is represented in the form + + H = I - tau * ( 1 ) * ( 1 v' ) , + ( v ) + + where tau is a complex scalar and v is a complex (n-1)-element + vector. Note that H is not hermitian. + + If the elements of x are all zero and alpha is real, then tau = 0 + and H is taken to be the unit matrix. + + Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . + + Arguments + ========= + + N (input) INTEGER + The order of the elementary reflector. + + ALPHA (input/output) COMPLEX*16 + On entry, the value alpha. + On exit, it is overwritten with the value beta. + + X (input/output) COMPLEX*16 array, dimension + (1+(N-2)*abs(INCX)) + On entry, the vector x. + On exit, it is overwritten with the vector v. + + INCX (input) INTEGER + The increment between elements of X. INCX > 0. + + TAU (output) COMPLEX*16 + The value tau. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + tau->r = 0., tau->i = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + alphr = alpha->r; + alphi = d_imag(alpha); + + if (xnorm == 0. && alphi == 0.) { + +/* H = I */ + + tau->r = 0., tau->i = 0.; + } else { + +/* general case */ + + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + safmin = SAFEMINIMUM / EPSILON; + rsafmn = 1. / safmin; + + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + knt = 0; +L10: + ++knt; + i__1 = *n - 1; + zdscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + alphi *= rsafmn; + alphr *= rsafmn; + if (abs(beta) < safmin) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + z__1.r = alphr, z__1.i = alphi; + alpha->r = z__1.r, alpha->i = z__1.i; + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + d__1 = (beta - alphr) / beta; + d__2 = -alphi / beta; + z__1.r = d__1, z__1.i = d__2; + tau->r = z__1.r, tau->i = z__1.i; + z__2.r = alpha->r - beta, z__2.i = alpha->i; + zladiv_(&z__1, &c_b60, &z__2); + alpha->r = z__1.r, alpha->i = z__1.i; + i__1 = *n - 1; + zscal_(&i__1, alpha, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + alpha->r = beta, alpha->i = 0.; + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + z__1.r = safmin * alpha->r, z__1.i = safmin * alpha->i; + alpha->r = z__1.r, alpha->i = z__1.i; +/* L20: */ + } + } else { + d__1 = (beta - alphr) / beta; + d__2 = -alphi / beta; + z__1.r = d__1, z__1.i = d__2; + tau->r = z__1.r, tau->i = z__1.i; + z__2.r = alpha->r - beta, z__2.i = alpha->i; + zladiv_(&z__1, &c_b60, &z__2); + alpha->r = z__1.r, alpha->i = z__1.i; + i__1 = *n - 1; + zscal_(&i__1, alpha, &x[1], incx); + alpha->r = beta, alpha->i = 0.; + } + } + + return 0; + +/* End of ZLARFG */ + +} /* zlarfg_ */ + +/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j; + static doublecomplex vii; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + ztrmv_(char *, char *, char *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLARFT forms the triangular factor T of a complex block reflector H + of order n, which is defined as a product of k elementary reflectors. + + If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; + + If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. + + If STOREV = 'C', the vector which defines the elementary reflector + H(i) is stored in the i-th column of the array V, and + + H = I - V * T * V' + + If STOREV = 'R', the vector which defines the elementary reflector + H(i) is stored in the i-th row of the array V, and + + H = I - V' * T * V + + Arguments + ========= + + DIRECT (input) CHARACTER*1 + Specifies the order in which the elementary reflectors are + multiplied to form the block reflector: + = 'F': H = H(1) H(2) . . . H(k) (Forward) + = 'B': H = H(k) . . . H(2) H(1) (Backward) + + STOREV (input) CHARACTER*1 + Specifies how the vectors which define the elementary + reflectors are stored (see also Further Details): + = 'C': columnwise + = 'R': rowwise + + N (input) INTEGER + The order of the block reflector H. N >= 0. + + K (input) INTEGER + The order of the triangular factor T (= the number of + elementary reflectors). K >= 1. + + V (input/output) COMPLEX*16 array, dimension + (LDV,K) if STOREV = 'C' + (LDV,N) if STOREV = 'R' + The matrix V. See further details. + + LDV (input) INTEGER + The leading dimension of the array V. + If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i). + + T (output) COMPLEX*16 array, dimension (LDT,K) + The k by k triangular factor T of the block reflector. + If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is + lower triangular. The rest of the array is not used. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= K. + + Further Details + =============== + + The shape of the matrix V and the storage of the vectors which define + the H(i) is best illustrated by the following example with n = 5 and + k = 3. The elements equal to 1 are not stored; the corresponding + array elements are modified but restored on exit. The rest of the + array is not used. + + DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': + + V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) + ( v1 1 ) ( 1 v2 v2 v2 ) + ( v1 v2 1 ) ( 1 v3 v3 ) + ( v1 v2 v3 ) + ( v1 v2 v3 ) + + DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': + + V = ( v1 v2 v3 ) V = ( v1 v1 1 ) + ( v1 v2 v3 ) ( v2 v2 v2 1 ) + ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) + ( 1 v3 ) + ( 1 ) + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, "F")) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + if (tau[i__2].r == 0. && tau[i__2].i == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0., t[i__3].i = 0.; +/* L10: */ + } + } else { + +/* general case */ + + i__2 = i__ + i__ * v_dim1; + vii.r = v[i__2].r, vii.i = v[i__2].i; + i__2 = i__ + i__ * v_dim1; + v[i__2].r = 1., v[i__2].i = 0.; + if (lsame_(storev, "C")) { + +/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ + + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & + c_b59, &t[i__ * t_dim1 + 1], &c__1); + } else { + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ + + if (i__ < *n) { + i__2 = *n - i__; + zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_("No transpose", &i__2, &i__3, &z__1, &v[i__ * + v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & + c_b59, &t[i__ * t_dim1 + 1], &c__1); + if (i__ < *n) { + i__2 = *n - i__; + zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); + } + } + i__2 = i__ + i__ * v_dim1; + v[i__2].r = vii.r, v[i__2].i = vii.i; + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + } +/* L20: */ + } + } else { + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0. && tau[i__1].i == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0., t[i__2].i = 0.; +/* L30: */ + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, "C")) { + i__1 = *n - *k + i__ + i__ * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = 1., v[i__1].i = 0.; + +/* + T(i+1:k,i) := + - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) +*/ + + i__1 = *n - *k + i__; + i__2 = *k - i__; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[ + (i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1 + + 1], &c__1, &c_b59, &t[i__ + 1 + i__ * + t_dim1], &c__1); + i__1 = *n - *k + i__ + i__ * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } else { + i__1 = i__ + (*n - *k + i__) * v_dim1; + vii.r = v[i__1].r, vii.i = v[i__1].i; + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = 1., v[i__1].i = 0.; + +/* + T(i+1:k,i) := + - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' +*/ + + i__1 = *n - *k + i__ - 1; + zlacgv_(&i__1, &v[i__ + v_dim1], ldv); + i__1 = *k - i__; + i__2 = *n - *k + i__; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_("No transpose", &i__1, &i__2, &z__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & + c_b59, &t[i__ + 1 + i__ * t_dim1], &c__1); + i__1 = *n - *k + i__ - 1; + zlacgv_(&i__1, &v[i__ + v_dim1], ldv); + i__1 = i__ + (*n - *k + i__) * v_dim1; + v[i__1].r = vii.r, v[i__1].i = vii.i; + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1) + ; + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } +/* L40: */ + } + } + return 0; + +/* End of ZLARFT */ + +} /* zlarft_ */ + +/* Subroutine */ int zlarfx_(char *side, integer *m, integer *n, + doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, + i__9, i__10, i__11; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8, z__9, z__10, + z__11, z__12, z__13, z__14, z__15, z__16, z__17, z__18, z__19; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer j; + static doublecomplex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, + v5, v6, v7, v8, v9, t10, v10, sum; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLARFX applies a complex elementary reflector H to a complex m by n + matrix C, from either the left or the right. H is represented in the + form + + H = I - tau * v * v' + + where tau is a complex scalar and v is a complex vector. + + If tau = 0, then H is taken to be the unit matrix + + This version uses inline code if H has order < 11. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': form H * C + = 'R': form C * H + + M (input) INTEGER + The number of rows of the matrix C. + + N (input) INTEGER + The number of columns of the matrix C. + + V (input) COMPLEX*16 array, dimension (M) if SIDE = 'L' + or (N) if SIDE = 'R' + The vector v in the representation of H. + + TAU (input) COMPLEX*16 + The value tau in the representation of H. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m by n matrix C. + On exit, C is overwritten by the matrix H * C if SIDE = 'L', + or C * H if SIDE = 'R'. + + LDC (input) INTEGER + The leading dimension of the array C. LDA >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' + or (M) if SIDE = 'R' + WORK is not referenced if H has order < 11. + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + if (tau->r == 0. && tau->i == 0.) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form H * C, where H has order m. */ + + switch (*m) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + case 7: goto L130; + case 8: goto L150; + case 9: goto L170; + case 10: goto L190; + } + +/* + Code for general M + + w := C'*v +*/ + + zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1] + , &c__1, &c_b59, &work[1], &c__1); + +/* C := C - tau * v * w' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(m, n, &z__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], + ldc); + goto L410; +L10: + +/* Special code for 1 x 1 Householder */ + + z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i + + tau->i * v[1].r; + d_cnjg(&z__4, &v[1]); + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i + + z__3.i * z__4.r; + z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; + t1.r = z__1.r, t1.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r * + c__[i__3].i + t1.i * c__[i__3].r; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L20: */ + } + goto L410; +L30: + +/* Special code for 2 x 2 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L40: */ + } + goto L410; +L50: + +/* Special code for 3 x 3 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + i__4 = j * c_dim1 + 3; + z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L60: */ + } + goto L410; +L70: + +/* Special code for 4 x 4 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i; + i__4 = j * c_dim1 + 3; + z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i; + i__5 = j * c_dim1 + 4; + z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L80: */ + } + goto L410; +L90: + +/* Special code for 5 x 5 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i; + i__4 = j * c_dim1 + 3; + z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i; + i__5 = j * c_dim1 + 4; + z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i; + i__6 = j * c_dim1 + 5; + z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r * + c__[i__6].i + v5.i * c__[i__6].r; + z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L100: */ + } + goto L410; +L110: + +/* Special code for 6 x 6 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + d_cnjg(&z__1, &v[6]); + v6.r = z__1.r, v6.i = z__1.i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i; + i__4 = j * c_dim1 + 3; + z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i; + i__5 = j * c_dim1 + 4; + z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i; + i__6 = j * c_dim1 + 5; + z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i; + i__7 = j * c_dim1 + 6; + z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L120: */ + } + goto L410; +L130: + +/* Special code for 7 x 7 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + d_cnjg(&z__1, &v[6]); + v6.r = z__1.r, v6.i = z__1.i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + d_cnjg(&z__1, &v[7]); + v7.r = z__1.r, v7.i = z__1.i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i; + i__4 = j * c_dim1 + 3; + z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i; + i__5 = j * c_dim1 + 4; + z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i; + i__6 = j * c_dim1 + 5; + z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i; + i__7 = j * c_dim1 + 6; + z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i; + i__8 = j * c_dim1 + 7; + z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L140: */ + } + goto L410; +L150: + +/* Special code for 8 x 8 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + d_cnjg(&z__1, &v[6]); + v6.r = z__1.r, v6.i = z__1.i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + d_cnjg(&z__1, &v[7]); + v7.r = z__1.r, v7.i = z__1.i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + d_cnjg(&z__1, &v[8]); + v8.r = z__1.r, v8.i = z__1.i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i; + i__4 = j * c_dim1 + 3; + z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i; + i__5 = j * c_dim1 + 4; + z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i; + i__6 = j * c_dim1 + 5; + z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i; + i__7 = j * c_dim1 + 6; + z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i; + i__8 = j * c_dim1 + 7; + z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i; + i__9 = j * c_dim1 + 8; + z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L160: */ + } + goto L410; +L170: + +/* Special code for 9 x 9 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + d_cnjg(&z__1, &v[6]); + v6.r = z__1.r, v6.i = z__1.i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + d_cnjg(&z__1, &v[7]); + v7.r = z__1.r, v7.i = z__1.i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + d_cnjg(&z__1, &v[8]); + v8.r = z__1.r, v8.i = z__1.i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + d_cnjg(&z__1, &v[9]); + v9.r = z__1.r, v9.i = z__1.i; + d_cnjg(&z__2, &v9); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t9.r = z__1.r, t9.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i; + i__4 = j * c_dim1 + 3; + z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i; + i__5 = j * c_dim1 + 4; + z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i; + i__6 = j * c_dim1 + 5; + z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i; + i__7 = j * c_dim1 + 6; + z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i; + i__8 = j * c_dim1 + 7; + z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i; + i__9 = j * c_dim1 + 8; + z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i; + i__10 = j * c_dim1 + 9; + z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 9; + i__3 = j * c_dim1 + 9; + z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + + sum.i * t9.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L180: */ + } + goto L410; +L190: + +/* Special code for 10 x 10 Householder */ + + d_cnjg(&z__1, &v[1]); + v1.r = z__1.r, v1.i = z__1.i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + d_cnjg(&z__1, &v[2]); + v2.r = z__1.r, v2.i = z__1.i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + d_cnjg(&z__1, &v[3]); + v3.r = z__1.r, v3.i = z__1.i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + d_cnjg(&z__1, &v[4]); + v4.r = z__1.r, v4.i = z__1.i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + d_cnjg(&z__1, &v[5]); + v5.r = z__1.r, v5.i = z__1.i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + d_cnjg(&z__1, &v[6]); + v6.r = z__1.r, v6.i = z__1.i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + d_cnjg(&z__1, &v[7]); + v7.r = z__1.r, v7.i = z__1.i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + d_cnjg(&z__1, &v[8]); + v8.r = z__1.r, v8.i = z__1.i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + d_cnjg(&z__1, &v[9]); + v9.r = z__1.r, v9.i = z__1.i; + d_cnjg(&z__2, &v9); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t9.r = z__1.r, t9.i = z__1.i; + d_cnjg(&z__1, &v[10]); + v10.r = z__1.r, v10.i = z__1.i; + d_cnjg(&z__2, &v10); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t10.r = z__1.r, t10.i = z__1.i; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j * c_dim1 + 1; + z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r + * c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j * c_dim1 + 2; + z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i; + i__4 = j * c_dim1 + 3; + z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i; + i__5 = j * c_dim1 + 4; + z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i; + i__6 = j * c_dim1 + 5; + z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i; + i__7 = j * c_dim1 + 6; + z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i; + i__8 = j * c_dim1 + 7; + z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i; + i__9 = j * c_dim1 + 8; + z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i; + i__10 = j * c_dim1 + 9; + z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i; + i__11 = j * c_dim1 + 10; + z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i = + v10.r * c__[i__11].i + v10.i * c__[i__11].r; + z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j * c_dim1 + 1; + i__3 = j * c_dim1 + 1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 2; + i__3 = j * c_dim1 + 2; + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 3; + i__3 = j * c_dim1 + 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 4; + i__3 = j * c_dim1 + 4; + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 5; + i__3 = j * c_dim1 + 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 6; + i__3 = j * c_dim1 + 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 7; + i__3 = j * c_dim1 + 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 8; + i__3 = j * c_dim1 + 8; + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 9; + i__3 = j * c_dim1 + 9; + z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + + sum.i * t9.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j * c_dim1 + 10; + i__3 = j * c_dim1 + 10; + z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i + + sum.i * t10.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L200: */ + } + goto L410; + } else { + +/* Form C * H, where H has order n. */ + + switch (*n) { + case 1: goto L210; + case 2: goto L230; + case 3: goto L250; + case 4: goto L270; + case 5: goto L290; + case 6: goto L310; + case 7: goto L330; + case 8: goto L350; + case 9: goto L370; + case 10: goto L390; + } + +/* + Code for general N + + w := C * v +*/ + + zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], & + c__1, &c_b59, &work[1], &c__1); + +/* C := C - tau * w * v' */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], + ldc); + goto L410; +L210: + +/* Special code for 1 x 1 Householder */ + + z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i + + tau->i * v[1].r; + d_cnjg(&z__4, &v[1]); + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i + + z__3.i * z__4.r; + z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; + t1.r = z__1.r, t1.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r * + c__[i__3].i + t1.i * c__[i__3].r; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L220: */ + } + goto L410; +L230: + +/* Special code for 2 x 2 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L240: */ + } + goto L410; +L250: + +/* Special code for 3 x 3 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + i__4 = j + c_dim1 * 3; + z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L260: */ + } + goto L410; +L270: + +/* Special code for 4 x 4 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i; + i__4 = j + c_dim1 * 3; + z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i; + i__5 = j + (c_dim1 << 2); + z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L280: */ + } + goto L410; +L290: + +/* Special code for 5 x 5 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i; + i__4 = j + c_dim1 * 3; + z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i; + i__5 = j + (c_dim1 << 2); + z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i; + i__6 = j + c_dim1 * 5; + z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r * + c__[i__6].i + v5.i * c__[i__6].r; + z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L300: */ + } + goto L410; +L310: + +/* Special code for 6 x 6 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + v6.r = v[6].r, v6.i = v[6].i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i; + i__4 = j + c_dim1 * 3; + z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i; + i__5 = j + (c_dim1 << 2); + z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r * + c__[i__5].i + v4.i * c__[i__5].r; + z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i; + i__6 = j + c_dim1 * 5; + z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i; + i__7 = j + c_dim1 * 6; + z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L320: */ + } + goto L410; +L330: + +/* Special code for 7 x 7 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + v6.r = v[6].r, v6.i = v[6].i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + v7.r = v[7].r, v7.i = v[7].i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i; + i__4 = j + c_dim1 * 3; + z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r * + c__[i__4].i + v3.i * c__[i__4].r; + z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i; + i__5 = j + (c_dim1 << 2); + z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i; + i__6 = j + c_dim1 * 5; + z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i; + i__7 = j + c_dim1 * 6; + z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i; + i__8 = j + c_dim1 * 7; + z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L340: */ + } + goto L410; +L350: + +/* Special code for 8 x 8 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + v6.r = v[6].r, v6.i = v[6].i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + v7.r = v[7].r, v7.i = v[7].i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + v8.r = v[8].r, v8.i = v[8].i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r * + c__[i__3].i + v2.i * c__[i__3].r; + z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i; + i__4 = j + c_dim1 * 3; + z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i; + i__5 = j + (c_dim1 << 2); + z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i; + i__6 = j + c_dim1 * 5; + z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i; + i__7 = j + c_dim1 * 6; + z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i; + i__8 = j + c_dim1 * 7; + z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i; + i__9 = j + (c_dim1 << 3); + z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L360: */ + } + goto L410; +L370: + +/* Special code for 9 x 9 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + v6.r = v[6].r, v6.i = v[6].i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + v7.r = v[7].r, v7.i = v[7].i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + v8.r = v[8].r, v8.i = v[8].i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + v9.r = v[9].r, v9.i = v[9].i; + d_cnjg(&z__2, &v9); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t9.r = z__1.r, t9.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r * + c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i; + i__4 = j + c_dim1 * 3; + z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i; + i__5 = j + (c_dim1 << 2); + z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i; + i__6 = j + c_dim1 * 5; + z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i; + i__7 = j + c_dim1 * 6; + z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i; + i__8 = j + c_dim1 * 7; + z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i; + i__9 = j + (c_dim1 << 3); + z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i; + i__10 = j + c_dim1 * 9; + z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 9; + i__3 = j + c_dim1 * 9; + z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + + sum.i * t9.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L380: */ + } + goto L410; +L390: + +/* Special code for 10 x 10 Householder */ + + v1.r = v[1].r, v1.i = v[1].i; + d_cnjg(&z__2, &v1); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t1.r = z__1.r, t1.i = z__1.i; + v2.r = v[2].r, v2.i = v[2].i; + d_cnjg(&z__2, &v2); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t2.r = z__1.r, t2.i = z__1.i; + v3.r = v[3].r, v3.i = v[3].i; + d_cnjg(&z__2, &v3); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t3.r = z__1.r, t3.i = z__1.i; + v4.r = v[4].r, v4.i = v[4].i; + d_cnjg(&z__2, &v4); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t4.r = z__1.r, t4.i = z__1.i; + v5.r = v[5].r, v5.i = v[5].i; + d_cnjg(&z__2, &v5); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t5.r = z__1.r, t5.i = z__1.i; + v6.r = v[6].r, v6.i = v[6].i; + d_cnjg(&z__2, &v6); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t6.r = z__1.r, t6.i = z__1.i; + v7.r = v[7].r, v7.i = v[7].i; + d_cnjg(&z__2, &v7); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t7.r = z__1.r, t7.i = z__1.i; + v8.r = v[8].r, v8.i = v[8].i; + d_cnjg(&z__2, &v8); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t8.r = z__1.r, t8.i = z__1.i; + v9.r = v[9].r, v9.i = v[9].i; + d_cnjg(&z__2, &v9); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t9.r = z__1.r, t9.i = z__1.i; + v10.r = v[10].r, v10.i = v[10].i; + d_cnjg(&z__2, &v10); + z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i + + tau->i * z__2.r; + t10.r = z__1.r, t10.i = z__1.i; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + i__2 = j + c_dim1; + z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r + * c__[i__2].i + v1.i * c__[i__2].r; + i__3 = j + (c_dim1 << 1); + z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r + * c__[i__3].i + v2.i * c__[i__3].r; + z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i; + i__4 = j + c_dim1 * 3; + z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r + * c__[i__4].i + v3.i * c__[i__4].r; + z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i; + i__5 = j + (c_dim1 << 2); + z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r + * c__[i__5].i + v4.i * c__[i__5].r; + z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i; + i__6 = j + c_dim1 * 5; + z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r + * c__[i__6].i + v5.i * c__[i__6].r; + z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i; + i__7 = j + c_dim1 * 6; + z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r + * c__[i__7].i + v6.i * c__[i__7].r; + z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i; + i__8 = j + c_dim1 * 7; + z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r + * c__[i__8].i + v7.i * c__[i__8].r; + z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i; + i__9 = j + (c_dim1 << 3); + z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r + * c__[i__9].i + v8.i * c__[i__9].r; + z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i; + i__10 = j + c_dim1 * 9; + z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i = + v9.r * c__[i__10].i + v9.i * c__[i__10].r; + z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i; + i__11 = j + c_dim1 * 10; + z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i = + v10.r * c__[i__11].i + v10.i * c__[i__11].r; + z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i; + sum.r = z__1.r, sum.i = z__1.i; + i__2 = j + c_dim1; + i__3 = j + c_dim1; + z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + + sum.i * t1.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 1); + i__3 = j + (c_dim1 << 1); + z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + + sum.i * t2.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 3; + i__3 = j + c_dim1 * 3; + z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + + sum.i * t3.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 2); + i__3 = j + (c_dim1 << 2); + z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + + sum.i * t4.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 5; + i__3 = j + c_dim1 * 5; + z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + + sum.i * t5.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 6; + i__3 = j + c_dim1 * 6; + z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + + sum.i * t6.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 7; + i__3 = j + c_dim1 * 7; + z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + + sum.i * t7.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + (c_dim1 << 3); + i__3 = j + (c_dim1 << 3); + z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + + sum.i * t8.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 9; + i__3 = j + c_dim1 * 9; + z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + + sum.i * t9.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + i__2 = j + c_dim1 * 10; + i__3 = j + c_dim1 * 10; + z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i + + sum.i * t10.r; + z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; +/* L400: */ + } + goto L410; + } +L410: + return 0; + +/* End of ZLARFX */ + +} /* zlarfx_ */ + +/* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, k1, k2, k3, k4; + static doublereal mul, cto1; + static logical done; + static doublereal ctoc; + extern logical lsame_(char *, char *); + static integer itype; + static doublereal cfrom1; + + static doublereal cfromc; + extern /* Subroutine */ int xerbla_(char *, integer *); + static doublereal bignum, smlnum; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + ZLASCL multiplies the M by N complex matrix A by the real scalar + CTO/CFROM. This is done without over/underflow as long as the final + result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that + A may be full, upper triangular, lower triangular, upper Hessenberg, + or banded. + + Arguments + ========= + + TYPE (input) CHARACTER*1 + TYPE indices the storage type of the input matrix. + = 'G': A is a full matrix. + = 'L': A is a lower triangular matrix. + = 'U': A is an upper triangular matrix. + = 'H': A is an upper Hessenberg matrix. + = 'B': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the lower + half stored. + = 'Q': A is a symmetric band matrix with lower bandwidth KL + and upper bandwidth KU and with the only the upper + half stored. + = 'Z': A is a band matrix with lower bandwidth KL and upper + bandwidth KU. + + KL (input) INTEGER + The lower bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + KU (input) INTEGER + The upper bandwidth of A. Referenced only if TYPE = 'B', + 'Q' or 'Z'. + + CFROM (input) DOUBLE PRECISION + CTO (input) DOUBLE PRECISION + The matrix A is multiplied by CTO/CFROM. A(I,J) is computed + without over/underflow if the final result CTO*A(I,J)/CFROM + can be represented without over/underflow. CFROM must be + nonzero. + + M (input) INTEGER + The number of rows of the matrix A. M >= 0. + + N (input) INTEGER + The number of columns of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,M) + The matrix to be multiplied by CTO/CFROM. See TYPE for the + storage type. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + INFO (output) INTEGER + 0 - successful exit + <0 - if INFO = -i, the i-th argument had an illegal value. + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, "G")) { + itype = 0; + } else if (lsame_(type__, "L")) { + itype = 1; + } else if (lsame_(type__, "U")) { + itype = 2; + } else if (lsame_(type__, "H")) { + itype = 3; + } else if (lsame_(type__, "B")) { + itype = 4; + } else if (lsame_(type__, "Q")) { + itype = 5; + } else if (lsame_(type__, "Z")) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0.) { + *info = -4; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLASCL", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + cto1 = ctoc / bignum; + if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + i__2 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of ZLASCL */ + +} /* zlascl_ */ + +/* Subroutine */ int zlaset_(char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * + lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j; + extern logical lsame_(char *, char *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLASET initializes a 2-D array A to BETA on the diagonal and + ALPHA on the offdiagonals. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies the part of the matrix A to be set. + = 'U': Upper triangular part is set. The lower triangle + is unchanged. + = 'L': Lower triangular part is set. The upper triangle + is unchanged. + Otherwise: All of the matrix A is set. + + M (input) INTEGER + On entry, M specifies the number of rows of A. + + N (input) INTEGER + On entry, N specifies the number of columns of A. + + ALPHA (input) COMPLEX*16 + All the offdiagonal array elements are set to ALPHA. + + BETA (input) COMPLEX*16 + All the diagonal array elements are set to BETA. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the m by n matrix A. + On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; + A(i,i) = BETA , 1 <= i <= min(m,n) + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, "U")) { + +/* + Set the diagonal to BETA and the strictly upper triangular + part of the array to ALPHA. +*/ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L10: */ + } +/* L20: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L30: */ + } + + } else if (lsame_(uplo, "L")) { + +/* + Set the diagonal to BETA and the strictly lower triangular + part of the array to ALPHA. +*/ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L40: */ + } +/* L50: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L60: */ + } + + } else { + +/* + Set the array to BETA on the diagonal and ALPHA on the + offdiagonal. +*/ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L70: */ + } +/* L80: */ + } + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L90: */ + } + } + + return 0; + +/* End of ZLASET */ + +} /* zlaset_ */ + +/* Subroutine */ int zlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublecomplex *a, + integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + static integer i__, j, info; + static doublecomplex temp; + extern logical lsame_(char *, char *); + static doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + October 31, 1992 + + + Purpose + ======= + + ZLASR performs the transformation + + A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) + + A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) + + where A is an m by n complex matrix and P is an orthogonal matrix, + consisting of a sequence of plane rotations determined by the + parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' + and z = n when SIDE = 'R' or 'r' ): + + When DIRECT = 'F' or 'f' ( Forward sequence ) then + + P = P( z - 1 )*...*P( 2 )*P( 1 ), + + and when DIRECT = 'B' or 'b' ( Backward sequence ) then + + P = P( 1 )*P( 2 )*...*P( z - 1 ), + + where P( k ) is a plane rotation matrix for the following planes: + + when PIVOT = 'V' or 'v' ( Variable pivot ), + the plane ( k, k + 1 ) + + when PIVOT = 'T' or 't' ( Top pivot ), + the plane ( 1, k + 1 ) + + when PIVOT = 'B' or 'b' ( Bottom pivot ), + the plane ( k, z ) + + c( k ) and s( k ) must contain the cosine and sine that define the + matrix P( k ). The two by two plane rotation part of the matrix + P( k ), R( k ), is assumed to be of the form + + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + Arguments + ========= + + SIDE (input) CHARACTER*1 + Specifies whether the plane rotation matrix P is applied to + A on the left or the right. + = 'L': Left, compute A := P*A + = 'R': Right, compute A:= A*P' + + DIRECT (input) CHARACTER*1 + Specifies whether P is a forward or backward sequence of + plane rotations. + = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) + = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) + + PIVOT (input) CHARACTER*1 + Specifies the plane for which P(k) is a plane rotation + matrix. + = 'V': Variable pivot, the plane (k,k+1) + = 'T': Top pivot, the plane (1,k+1) + = 'B': Bottom pivot, the plane (k,z) + + M (input) INTEGER + The number of rows of the matrix A. If m <= 1, an immediate + return is effected. + + N (input) INTEGER + The number of columns of the matrix A. If n <= 1, an + immediate return is effected. + + C, S (input) DOUBLE PRECISION arrays, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + c(k) and s(k) contain the cosine and sine that define the + matrix P(k). The two by two plane rotation part of the + matrix P(k), R(k), is assumed to be of the form + R( k ) = ( c( k ) s( k ) ). + ( -s( k ) c( k ) ) + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + The m by n matrix A. On exit, A is overwritten by P*A if + SIDE = 'R' or by A*P' if SIDE = 'L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,M). + + ===================================================================== + + + Test the input parameters +*/ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, "L") || lsame_(side, "R"))) { + info = 1; + } else if (! (lsame_(pivot, "V") || lsame_(pivot, + "T") || lsame_(pivot, "B"))) { + info = 2; + } else if (! (lsame_(direct, "F") || lsame_(direct, + "B"))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_("ZLASR ", &info); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, "L")) { + +/* Form P * A */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + 1 + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + 1 + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = *m + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, "B")) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *m + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, "R")) { + +/* Form A * P' */ + + if (lsame_(pivot, "V")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + 1) * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + (j + 1) * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, "T")) { + if (lsame_(direct, "F")) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, "B")) { + if (lsame_(direct, "F")) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + *n * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, "B")) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + *n * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of ZLASR */ + +} /* zlasr_ */ + +/* Subroutine */ int zlassq_(integer *n, doublecomplex *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + static integer ix; + static doublereal temp1; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLASSQ returns the values scl and ssq such that + + ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, + + where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is + assumed to be at least unity and the value of ssq will then satisfy + + 1.0 .le. ssq .le. ( sumsq + 2*n ). + + scale is assumed to be non-negative and scl returns the value + + scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), + i + + scale and sumsq must be supplied in SCALE and SUMSQ respectively. + SCALE and SUMSQ are overwritten by scl and ssq respectively. + + The routine makes only one pass through the vector X. + + Arguments + ========= + + N (input) INTEGER + The number of elements to be used from the vector X. + + X (input) COMPLEX*16 array, dimension (N) + The vector x as described above. + x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. + + INCX (input) INTEGER + The increment between successive values of the vector X. + INCX > 0. + + SCALE (input/output) DOUBLE PRECISION + On entry, the value scale in the equation above. + On exit, SCALE is overwritten with the value scl . + + SUMSQ (input/output) DOUBLE PRECISION + On entry, the value sumsq in the equation above. + On exit, SUMSQ is overwritten with the value ssq . + + ===================================================================== +*/ + + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp1 = (d__1 = x[i__3].r, abs(d__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp1 = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + + return 0; + +/* End of ZLASSQ */ + +} /* zlassq_ */ + +/* Subroutine */ int zlaswp_(integer *n, doublecomplex *a, integer *lda, + integer *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + + /* Local variables */ + static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + static doublecomplex temp; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZLASWP performs a series of row interchanges on the matrix A. + One row interchange is initiated for each of rows K1 through K2 of A. + + Arguments + ========= + + N (input) INTEGER + The number of columns of the matrix A. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the matrix of column dimension N to which the row + interchanges will be applied. + On exit, the permuted matrix. + + LDA (input) INTEGER + The leading dimension of the array A. + + K1 (input) INTEGER + The first element of IPIV for which a row interchange will + be done. + + K2 (input) INTEGER + The last element of IPIV for which a row interchange will + be done. + + IPIV (input) INTEGER array, dimension (M*abs(INCX)) + The vector of pivot indices. Only the elements in positions + K1 through K2 of IPIV are accessed. + IPIV(K) = L implies rows K and L are to be interchanged. + + INCX (input) INTEGER + The increment between successive values of IPIV. If IPIV + is negative, the pivots are applied in reverse order. + + Further Details + =============== + + Modified by + R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA + + ===================================================================== + + + Interchange row I with row IPIV(I) for each of rows K1 through K2. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = (1 - *k2) * *incx + 1; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of ZLASWP */ + +} /* zlaswp_ */ + +/* Subroutine */ int zlatrd_(char *uplo, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, + doublecomplex *w, integer *ldw) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + static integer i__, iw; + static doublecomplex alpha; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *), + zhemv_(char *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to + Hermitian tridiagonal form by a unitary similarity + transformation Q' * A * Q, and returns the matrices V and W which are + needed to apply the transformation to the unreduced part of A. + + If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a + matrix, of which the upper triangle is supplied; + if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a + matrix, of which the lower triangle is supplied. + + This is an auxiliary routine called by ZHETRD. + + Arguments + ========= + + UPLO (input) CHARACTER + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. + + NB (input) INTEGER + The number of rows and columns to be reduced. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n-by-n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n-by-n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + On exit: + if UPLO = 'U', the last NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements above the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors; + if UPLO = 'L', the first NB columns have been reduced to + tridiagonal form, with the diagonal elements overwriting + the diagonal elements of A; the elements below the diagonal + with the array TAU, represent the unitary matrix Q as a + product of elementary reflectors. + See Further Details. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + E (output) DOUBLE PRECISION array, dimension (N-1) + If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal + elements of the last NB columns of the reduced matrix; + if UPLO = 'L', E(1:nb) contains the subdiagonal elements of + the first NB columns of the reduced matrix. + + TAU (output) COMPLEX*16 array, dimension (N-1) + The scalar factors of the elementary reflectors, stored in + TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. + See Further Details. + + W (output) COMPLEX*16 array, dimension (LDW,NB) + The n-by-nb matrix W required to update the unreduced part + of A. + + LDW (input) INTEGER + The leading dimension of the array W. LDW >= max(1,N). + + Further Details + =============== + + If UPLO = 'U', the matrix Q is represented as a product of elementary + reflectors + + Q = H(n) H(n-1) . . . H(n-nb+1). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), + and tau in TAU(i-1). + + If UPLO = 'L', the matrix Q is represented as a product of elementary + reflectors + + Q = H(1) H(2) . . . H(nb). + + Each H(i) has the form + + H(i) = I - tau * v * v' + + where tau is a complex scalar, and v is a complex vector with + v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), + and tau in TAU(i). + + The elements of the vectors v together form the n-by-nb matrix V + which is needed, with W, to apply the transformation to the unreduced + part of the matrix, using a Hermitian rank-2k update of the form: + A := A - V*W' - W*V'. + + The contents of A on exit are illustrated by the following examples + with n = 5 and nb = 2: + + if UPLO = 'U': if UPLO = 'L': + + ( a a a v4 v5 ) ( d ) + ( a a v4 v5 ) ( 1 d ) + ( a 1 v5 ) ( v1 1 a ) + ( d 1 ) ( v1 v2 a a ) + ( d ) ( v1 v2 a a a ) + + where d denotes a diagonal element of the reduced matrix, a denotes + an element of the original matrix that is unchanged, and vi denotes + an element of the vector defining H(i). + + ===================================================================== + + + Quick return if possible +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, "U")) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b60, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b60, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + if (i__ > 1) { + +/* + Generate elementary reflector H(i) to annihilate + A(1:i-2,i) +*/ + + i__2 = i__ - 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ - 1; + zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ + - 1]); + i__2 = i__ - 1; + e[i__2] = alpha.r; + i__2 = i__ - 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + zhemv_("Upper", &i__2, &c_b60, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b59, &w[iw * w_dim1 + 1], & + c__1); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[( + iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], + &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[( + i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], + &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1); + } + i__2 = i__ - 1; + zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__ - 1; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = i__ - 1; + zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * + a_dim1 + 1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = i__ - 1; + zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b60, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], & + c__1); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + if (i__ < *n) { + +/* + Generate elementary reflector H(i) to annihilate + A(i+2:n,i) +*/ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, + &tau[i__]); + i__2 = i__; + e[i__2] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + zhemv_("Lower", &i__2, &c_b60, &a[i__ + 1 + (i__ + 1) * + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b59, &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[i__ + + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b59, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b59, &w[i__ * w_dim1 + 1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = *n - i__; + zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of ZLATRD */ + +} /* zlatrd_ */ + +/* Subroutine */ int zlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, + doublereal *scale, doublereal *cnorm, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + double d_imag(doublecomplex *); + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j; + static doublereal xj, rec, tjj; + static integer jinc; + static doublereal xbnd; + static integer imax; + static doublereal tmax; + static doublecomplex tjjs; + static doublereal xmax, grow; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *); + static doublereal tscal; + static doublecomplex uscal; + static integer jlast; + static doublecomplex csumj; + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + static logical upper; + extern /* Double Complex */ VOID zdotu_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_( + char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_( + doublereal *, doublereal *); + + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + static doublereal bignum; + extern integer izamax_(integer *, doublecomplex *, integer *); + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + static logical notran; + static integer jfirst; + extern doublereal dzasum_(integer *, doublecomplex *, integer *); + static doublereal smlnum; + static logical nounit; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1992 + + + Purpose + ======= + + ZLATRS solves one of the triangular systems + + A * x = s*b, A**T * x = s*b, or A**H * x = s*b, + + with scaling to prevent overflow. Here A is an upper or lower + triangular matrix, A**T denotes the transpose of A, A**H denotes the + conjugate transpose of A, x and b are n-element vectors, and s is a + scaling factor, usually less than or equal to 1, chosen so that the + components of x will be less than the overflow threshold. If the + unscaled problem will not cause overflow, the Level 2 BLAS routine + ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), + then s is set to 0 and a non-trivial solution to A*x = 0 is returned. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + TRANS (input) CHARACTER*1 + Specifies the operation applied to A. + = 'N': Solve A * x = s*b (No transpose) + = 'T': Solve A**T * x = s*b (Transpose) + = 'C': Solve A**H * x = s*b (Conjugate transpose) + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + NORMIN (input) CHARACTER*1 + Specifies whether CNORM has been set or not. + = 'Y': CNORM contains the column norms on entry + = 'N': CNORM is not set on entry. On exit, the norms will + be computed and stored in CNORM. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The triangular matrix A. If UPLO = 'U', the leading n by n + upper triangular part of the array A contains the upper + triangular matrix, and the strictly lower triangular part of + A is not referenced. If UPLO = 'L', the leading n by n lower + triangular part of the array A contains the lower triangular + matrix, and the strictly upper triangular part of A is not + referenced. If DIAG = 'U', the diagonal elements of A are + also not referenced and are assumed to be 1. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max (1,N). + + X (input/output) COMPLEX*16 array, dimension (N) + On entry, the right hand side b of the triangular system. + On exit, X is overwritten by the solution vector x. + + SCALE (output) DOUBLE PRECISION + The scaling factor s for the triangular system + A * x = s*b, A**T * x = s*b, or A**H * x = s*b. + If SCALE = 0, the matrix A is singular or badly scaled, and + the vector x is an exact or approximate solution to A*x = 0. + + CNORM (input or output) DOUBLE PRECISION array, dimension (N) + + If NORMIN = 'Y', CNORM is an input argument and CNORM(j) + contains the norm of the off-diagonal part of the j-th column + of A. If TRANS = 'N', CNORM(j) must be greater than or equal + to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) + must be greater than or equal to the 1-norm. + + If NORMIN = 'N', CNORM is an output argument and CNORM(j) + returns the 1-norm of the offdiagonal part of the j-th column + of A. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + Further Details + ======= ======= + + A rough bound on x is computed; if that is less than overflow, ZTRSV + is called, otherwise, specific code is used which checks for possible + overflow or divide-by-zero at every operation. + + A columnwise scheme is used for solving A*x = b. The basic algorithm + if A is lower triangular is + + x[1:n] := b[1:n] + for j = 1, ..., n + x(j) := x(j) / A(j,j) + x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] + end + + Define bounds on the components of x after j iterations of the loop: + M(j) = bound on x[1:j] + G(j) = bound on x[j+1:n] + Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. + + Then for iteration j+1 we have + M(j+1) <= G(j) / | A(j+1,j+1) | + G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | + <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) + + where CNORM(j+1) is greater than or equal to the infinity-norm of + column j+1 of A, not counting the diagonal. Hence + + G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) + 1<=i<=j + and + + |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) + 1<=i< j + + Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the + reciprocal of the largest M(j), j=1,..,n, is larger than + max(underflow, 1/overflow). + + The bound on x(j) is also used to determine when a step in the + columnwise method can be performed without fear of overflow. If + the computed bound is greater than a large constant, x is scaled to + prevent overflow, but if the bound overflows, x is set to 0, x(j) to + 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. + + Similarly, a row-wise scheme is used to solve A**T *x = b or + A**H *x = b. The basic algorithm for A upper triangular is + + for j = 1, ..., n + x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) + end + + We simultaneously compute two bounds + G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j + M(j) = bound on x(i), 1<=i<=j + + The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we + add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. + Then the bound on x(j) is + + M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | + + <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) + 1<=i<=j + + and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater + than max(underflow, 1/overflow). + + ===================================================================== +*/ + + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + notran = lsame_(trans, "N"); + nounit = lsame_(diag, "N"); + +/* Test the input parameters. */ + + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "T") && ! + lsame_(trans, "C")) { + *info = -2; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -3; + } else if (! lsame_(normin, "Y") && ! lsame_(normin, + "N")) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLATRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = SAFEMINIMUM; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum /= PRECISION; + bignum = 1. / smlnum; + *scale = 1.; + + if (lsame_(normin, "N")) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = dzasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = dzasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.; + } + } + +/* + Scale the column norms by TSCAL if the maximum element in CNORM is + greater than BIGNUM/2. +*/ + + imax = idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum * .5) { + tscal = 1.; + } else { + tscal = .5 / (smlnum * tmax); + dscal_(n, &tscal, &cnorm[1], &c__1); + } + +/* + Compute a bound on the computed solution vector to see if the + Level 2 BLAS routine ZTRSV can be used. +*/ + + xmax = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j; + d__3 = xmax, d__4 = (d__1 = x[i__2].r / 2., abs(d__1)) + (d__2 = + d_imag(&x[j]) / 2., abs(d__2)); + xmax = max(d__3,d__4); +/* L30: */ + } + xbnd = xmax; + + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L60; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, G(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5 / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + + if (tjj >= smlnum) { + +/* + M(j) = G(j-1) / abs(A(j,j)) + + Computing MIN +*/ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.; + } + + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } +/* L40: */ + } + grow = xbnd; + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + d__1 = 1., d__2 = .5 / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L60; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L50: */ + } + } +L60: + + ; + } else { + +/* Compute the growth in A**T * x = b or A**H * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.) { + grow = 0.; + goto L90; + } + + if (nounit) { + +/* + A is non-unit triangular. + + Compute GROW = 1/G(j) and XBND = 1/M(j). + Initially, M(0) = max{x(i), i=1,...,n}. +*/ + + grow = .5 / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + + i__3 = j + j * a_dim1; + tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + + if (tjj >= smlnum) { + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + if (xj > tjj) { + xbnd *= tjj / xj; + } + } else { + +/* M(j) could overflow, set XBND to 0. */ + + xbnd = 0.; + } +/* L70: */ + } + grow = min(grow,xbnd); + } else { + +/* + A is unit triangular. + + Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. + + Computing MIN +*/ + d__1 = 1., d__2 = .5 / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L90; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L80: */ + } + } +L90: + ; + } + + if (grow * tscal > smlnum) { + +/* + Use the Level 2 BLAS solve if the reciprocal of the bound on + elements of X is not too small. +*/ + + ztrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum * .5) { + +/* + Scale X so that its components are less than or equal to + BIGNUM in absolute value. +*/ + + *scale = bignum * .5 / xmax; + zdscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } else { + xmax *= 2.; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3].i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L110; + } + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( + d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* + Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM + to avoid overflow when dividing by A(j,j). +*/ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* + Scale by 1/CNORM(j) to avoid overflow when + multiplying x(j) times column j. +*/ + + rec /= cnorm[j]; + } + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0, and compute a solution to A*x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L100: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L110: + +/* + Scale x if necessary to avoid overflow when adding a + multiple of column j of A. +*/ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + zdscal_(n, &c_b2210, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* + Compute the update + x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +*/ + + i__3 = j - 1; + i__4 = j; + z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + zaxpy_(&i__3, &z__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = izamax_(&i__3, &x[1], &c__1); + i__3 = i__; + xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( + &x[i__]), abs(d__2)); + } + } else { + if (j < *n) { + +/* + Compute the update + x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +*/ + + i__3 = *n - j; + i__4 = j; + z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + zaxpy_(&i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + izamax_(&i__3, &x[j + 1], &c__1); + i__3 = i__; + xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( + &x[i__]), abs(d__2)); + } + } +/* L120: */ + } + + } else if (lsame_(trans, "T")) { + +/* Solve A**T * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + uscal.r = tscal, uscal.i = 0.; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] + .i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > 1.) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + zladiv_(&z__1, &uscal, &tjjs); + uscal.r = z__1.r, uscal.i = z__1.i; + } + if (rec < 1.) { + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0., csumj.i = 0.; + if (uscal.r == 1. && uscal.i == 0.) { + +/* + If the scaling needed for A in the dot product is 1, + call ZDOTU to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + zdotu_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } else if (j < *n) { + i__3 = *n - j; + zdotu_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + z__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, z__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, + z__2.i = z__3.r * x[i__5].i + z__3.i * x[ + i__5].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L130: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * a_dim1; + z__3.r = a[i__4].r * uscal.r - a[i__4].i * + uscal.i, z__3.i = a[i__4].r * uscal.i + a[ + i__4].i * uscal.r; + i__5 = i__; + z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, + z__2.i = z__3.r * x[i__5].i + z__3.i * x[ + i__5].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L140: */ + } + } + } + + z__1.r = tscal, z__1.i = 0.; + if (uscal.r == z__1.r && uscal.i == z__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - + csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + if (nounit) { + i__3 = j + j * a_dim1; + z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] + .i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L160; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**T *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L150: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + *scale = 0.; + xmax = 0.; + } +L160: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + zladiv_(&z__2, &x[j], &tjjs); + z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } +/* Computing MAX */ + i__3 = j; + d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[j]), abs(d__2)); + xmax = max(d__3,d__4); +/* L170: */ + } + + } else { + +/* Solve A**H * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Compute x(j) = b(j) - sum A(k,j)*x(k). + k<>j +*/ + + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), + abs(d__2)); + uscal.r = tscal, uscal.i = 0.; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + } + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > 1.) { + +/* + Divide by A(j,j) when scaling x if A(j,j) > 1. + + Computing MIN +*/ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + zladiv_(&z__1, &uscal, &tjjs); + uscal.r = z__1.r, uscal.i = z__1.i; + } + if (rec < 1.) { + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + csumj.r = 0., csumj.i = 0.; + if (uscal.r == 1. && uscal.i == 0.) { + +/* + If the scaling needed for A in the dot product is 1, + call ZDOTC to perform the dot product. +*/ + + if (upper) { + i__3 = j - 1; + zdotc_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } else if (j < *n) { + i__3 = *n - j; + zdotc_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + csumj.r = z__1.r, csumj.i = z__1.i; + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + d_cnjg(&z__4, &a[i__ + j * a_dim1]); + z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, + z__3.i = z__4.r * uscal.i + z__4.i * + uscal.r; + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L180: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + d_cnjg(&z__4, &a[i__ + j * a_dim1]); + z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, + z__3.i = z__4.r * uscal.i + z__4.i * + uscal.r; + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + + z__2.i; + csumj.r = z__1.r, csumj.i = z__1.i; +/* L190: */ + } + } + } + + z__1.r = tscal, z__1.i = 0.; + if (uscal.r == z__1.r && uscal.i == z__1.i) { + +/* + Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) + was not used to scale the dotproduct. +*/ + + i__3 = j; + i__4 = j; + z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - + csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + i__3 = j; + xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) + , abs(d__2)); + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; + tjjs.r = z__1.r, tjjs.i = z__1.i; + } else { + tjjs.r = tscal, tjjs.i = 0.; + if (tscal == 1.) { + goto L210; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), + abs(d__2)); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + zdscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + i__3 = j; + zladiv_(&z__1, &x[j], &tjjs); + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } else { + +/* + A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and + scale = 0 and compute a solution to A**H *x = 0. +*/ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__; + x[i__4].r = 0., x[i__4].i = 0.; +/* L200: */ + } + i__3 = j; + x[i__3].r = 1., x[i__3].i = 0.; + *scale = 0.; + xmax = 0.; + } +L210: + ; + } else { + +/* + Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot + product has already been divided by 1/A(j,j). +*/ + + i__3 = j; + zladiv_(&z__2, &x[j], &tjjs); + z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } +/* Computing MAX */ + i__3 = j; + d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = + d_imag(&x[j]), abs(d__2)); + xmax = max(d__3,d__4); +/* L220: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of ZLATRS */ + +} /* zlatrs_ */ + +/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + static integer i__; + static doublereal aii; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *), zlacgv_( + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLAUU2 computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the unblocked form of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAUU2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ + i__ + (i__ + 1) * a_dim1], lda); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = aii, z__1.i = 0.; + zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + z__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } +/* L10: */ + } + + } else { + +/* Compute the product L' * L. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = aii, z__1.i = 0.; + zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + z__1, &a[i__ + a_dim1], lda); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } +/* L20: */ + } + } + + return 0; + +/* End of ZLAUU2 */ + +} /* zlauu2_ */ + +/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, ib, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zherk_(char *, char *, integer *, + integer *, doublereal *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZLAUUM computes the product U * U' or L' * L, where the triangular + factor U or L is stored in the upper or lower triangular part of + the array A. + + If UPLO = 'U' or 'u' then the upper triangle of the result is stored, + overwriting the factor U in A. + If UPLO = 'L' or 'l' then the lower triangle of the result is stored, + overwriting the factor L in A. + + This is the blocked form of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the triangular factor stored in the array A + is upper or lower triangular: + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the triangular factor U or L. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L. + On exit, if UPLO = 'U', the upper triangle of A is + overwritten with the upper triangle of the product U * U'; + if UPLO = 'L', the lower triangle of A is overwritten with + the lower triangle of the product L' * L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZLAUUM", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + zlauu2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute the product U * U'. */ + + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & + i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[ + i__ * a_dim1 + 1], lda); + zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_("No transpose", "Conjugate transpose", &i__3, &ib, + &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, & + a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__ + * a_dim1 + 1], lda); + i__3 = *n - i__ - ib + 1; + zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[ + i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__ + + i__ * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the product L' * L. */ + + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { +/* Computing MIN */ + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3,i__4); + i__3 = i__ - 1; + ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & + ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[ + i__ + a_dim1], lda); + zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_("Conjugate transpose", "No transpose", &ib, &i__3, + &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, & + a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ + + a_dim1], lda); + i__3 = *n - i__ - ib + 1; + zherk_("Lower", "Conjugate transpose", &ib, &i__3, & + c_b1015, &a[i__ + ib + i__ * a_dim1], lda, & + c_b1015, &a[i__ + i__ * a_dim1], lda); + } +/* L20: */ + } + } + } + + return 0; + +/* End of ZLAUUM */ + +} /* zlauum_ */ + +/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static doublereal ajj; + extern logical lsame_(char *, char *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *), zlacgv_( + integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZPOTF2 computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + Hermitian matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + i__3 = j - 1; + zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] + , &c__1); + z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; + ajj = z__1.r; + if (ajj <= 0.) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + i__3 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + ( + j + 1) * a_dim1], lda); + i__2 = j - 1; + zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); + i__2 = *n - j; + d__1 = 1. / ajj; + zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + i__3 = j - 1; + zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); + z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; + ajj = z__1.r; + if (ajj <= 0.) { + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + goto L30; + } + ajj = sqrt(ajj); + i__2 = j + j * a_dim1; + a[i__2].r = ajj, a[i__2].i = 0.; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = j - 1; + zlacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + i__3 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1] + , lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j * + a_dim1], &c__1); + i__2 = j - 1; + zlacgv_(&i__2, &a[j + a_dim1], lda); + i__2 = *n - j; + d__1 = 1. / ajj; + zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of ZPOTF2 */ + +} /* zpotf2_ */ + +/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), zherk_(char *, char *, integer *, + integer *, doublereal *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZPOTRF computes the Cholesky factorization of a complex Hermitian + positive definite matrix A. + + The factorization has the form + A = U**H * U, if UPLO = 'U', or + A = L * L**H, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the Hermitian matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + zpotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, & + a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1], + lda); + zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("Conjugate transpose", "No transpose", &jb, &i__3, + &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) + * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", + &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[ + j + (j + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j + + a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda); + zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_("No transpose", "Conjugate transpose", &i__3, &jb, + &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j + + a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1], + lda); + i__3 = *n - j - jb + 1; + ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" + , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[ + j + jb + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of ZPOTRF */ + +} /* zpotrf_ */ + +/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a, + integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_( + char *, integer *, doublecomplex *, integer *, integer *), + ztrtri_(char *, char *, integer *, doublecomplex *, integer *, + integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + ZPOTRI computes the inverse of a complex Hermitian positive definite + matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + computed by ZPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular factor U or L from the Cholesky + factorization A = U**H*U or A = L*L**H, as computed by + ZPOTRF. + On exit, the upper or lower triangle of the (Hermitian) + inverse of A, overwriting the input factor U or L. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the (i,i) element of the factor U or L is + zero, and the inverse could not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); + if (*info > 0) { + return 0; + } + +/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ + + zlauum_(uplo, n, &a[a_offset], lda, info); + + return 0; + +/* End of ZPOTRI */ + +} /* zpotri_ */ + +/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZPOTRS solves a system of linear equations A*X = B with a Hermitian + positive definite matrix A using the Cholesky factorization + A = U**H*U or A = L*L**H computed by ZPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**H*U or A = L*L**H, as computed by ZPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b60, &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, & + a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, & + a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & + c_b60, &a[a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of ZPOTRS */ + +} /* zpotrs_ */ + +/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + static integer i__, j, k, m; + static doublereal p; + static integer ii, ll, end, lgn; + static doublereal eps, tiny; + extern logical lsame_(char *, char *); + static integer lwmin, start; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed0_(integer *, integer *, + doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, integer *, integer *); + + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + static integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *); + static doublereal orgnrm; + static integer lrwmin; + static logical lquery; + static integer smlsiz; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the divide and conquer method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + matrix to tridiagonal form. + + This code makes very mild assumptions about floating point + arithmetic. It will work on machines with a guard digit in + add/subtract, or on those binary machines without guard digits + which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. + It could conceivably fail on hexadecimal or decimal machines + without guard digits, but we know of none. See DLAED3 for details. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'I': Compute eigenvectors of tridiagonal matrix also. + = 'V': Compute eigenvectors of original Hermitian matrix + also. On entry, Z contains the unitary matrix used + to reduce the original matrix to tridiagonal form. + + N (input) INTEGER + The dimension of the symmetric tridiagonal matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the subdiagonal elements of the tridiagonal matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX*16 array, dimension (LDZ,N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1. + If eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. + If COMPZ = 'V' and N > 1, LWORK must be at least N*N. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + RWORK (workspace/output) DOUBLE PRECISION array, + dimension (LRWORK) + On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. + + LRWORK (input) INTEGER + The dimension of the array RWORK. + If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. + If COMPZ = 'V' and N > 1, LRWORK must be at least + 1 + 3*N + 2*N*lg N + 3*N**2 , + where lg( N ) = smallest integer k such + that 2**k >= N. + If COMPZ = 'I' and N > 1, LRWORK must be at least + 1 + 4*N + 2*N**2 . + + If LRWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the RWORK array, + returns this value as the first entry of the RWORK array, and + no error message related to LRWORK is issued by XERBLA. + + IWORK (workspace/output) INTEGER array, dimension (LIWORK) + On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. + + LIWORK (input) INTEGER + The dimension of the array IWORK. + If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. + If COMPZ = 'V' or N > 1, LIWORK must be at least + 6 + 6*N + 5*N*lg N. + If COMPZ = 'I' or N > 1, LIWORK must be at least + 3 + 5*N . + + If LIWORK = -1, then a workspace query is assumed; the + routine only calculates the optimal size of the IWORK array, + returns this value as the first entry of the IWORK array, and + no error message related to LIWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit. + < 0: if INFO = -i, the i-th argument had an illegal value. + > 0: The algorithm failed to compute an eigenvalue while + working on the submatrix lying in rows and columns + INFO/(N+1) through mod(INFO,N+1). + + Further Details + =============== + + Based on contributions by + Jeff Rutter, Computer Science Division, University of California + at Berkeley, USA + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (*n <= 1 || icompz <= 0) { + lwmin = 1; + liwmin = 1; + lrwmin = 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { + lwmin = *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { + lwmin = 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } else if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + + if (*info == 0) { + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZSTEDC", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + + smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* + If the following conditional clause is removed, then the routine + will use the Divide and Conquer routine to compute only the + eigenvalues, which requires (3N + 3N**2) real workspace and + (2 + 5N + 2N lg(N)) integer workspace. + Since on many architectures DSTERF is much faster than any other + algorithm for finding eigenvalues only, it is used here + as the default. + + If COMPZ = 'N', use DSTERF to compute the eigenvalues. +*/ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + return 0; + } + +/* + If N is smaller than the minimum divide size (SMLSIZ+1), then + solve the problem with another solver. +*/ + + if (*n <= smlsiz) { + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + return 0; + } else if (icompz == 2) { + zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + return 0; + } else { + zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info); + return 0; + } + } + +/* If COMPZ = 'I', we simply call DSTEDC instead. */ + + if (icompz == 2) { + dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n); + ll = *n * *n + 1; + i__1 = *lrwork - ll + 1; + dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & + iwork[1], liwork, info); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * z_dim1; + i__4 = (j - 1) * *n + i__; + z__[i__3].r = rwork[i__4], z__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* + From now on, only option left to be handled is COMPZ = 'V', + i.e. ICOMPZ = 1. + + Scale. +*/ + + orgnrm = dlanst_("M", n, &d__[1], &e[1]); + if (orgnrm == 0.) { + return 0; + } + + eps = EPSILON; + + start = 1; + +/* while ( START <= N ) */ + +L30: + if (start <= *n) { + +/* + Let END be the position of the next subdiagonal entry such that + E( END ) <= TINY or END = N if no such subdiagonal exists. The + matrix identified by the elements between START and END + constitutes an independent sub-problem. +*/ + + end = start; +L40: + if (end < *n) { + tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 = + d__[end + 1], abs(d__2))); + if ((d__1 = e[end], abs(d__1)) > tiny) { + ++end; + goto L40; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = end - start + 1; + if (m > smlsiz) { + *info = smlsiz; + +/* Scale. */ + + orgnrm = dlanst_("M", &m, &d__[start], &e[start]); + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[ + start], &m, info); + i__1 = m - 1; + i__2 = m - 1; + dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[ + start], &i__2, info); + + zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1], + ldz, &work[1], n, &rwork[1], &iwork[1], info); + if (*info > 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + + 1) + start - 1; + return 0; + } + +/* Scale back. */ + + dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[ + start], &m, info); + + } else { + dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m * + m + 1], info); + zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & + work[1], n, &rwork[m * m + 1]); + zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz); + if (*info > 0) { + *info = start * (*n + 1) + end; + return 0; + } + } + + start = end + 1; + goto L30; + } + +/* + endwhile + + If the problem split any number of times, then the eigenvalues + will not be properly ordered. Here we permute the eigenvalues + (and the associated eigenvectors) into ascending order. +*/ + + if (m != *n) { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L50: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L60: */ + } + } + + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZSTEDC */ + +} /* zstedc_ */ + +/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + static doublereal b, c__, f, g; + static integer i__, j, k, l, m; + static doublereal p, r__, s; + static integer l1, ii, mm, lm1, mm1, nm1; + static doublereal rt1, rt2, eps; + static integer lsv; + static doublereal tst, eps2; + static integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *); + static doublereal anorm; + extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), dlaev2_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + static integer lendm1, lendp1; + + static integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *); + static doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + static doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *); + static integer lendsv; + static doublereal ssfmin; + static integer nmaxit, icompz; + static doublereal ssfmax; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a + symmetric tridiagonal matrix using the implicit QL or QR method. + The eigenvectors of a full or band complex Hermitian matrix can also + be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this + matrix to tridiagonal form. + + Arguments + ========= + + COMPZ (input) CHARACTER*1 + = 'N': Compute eigenvalues only. + = 'V': Compute eigenvalues and eigenvectors of the original + Hermitian matrix. On entry, Z must contain the + unitary matrix used to reduce the original matrix + to tridiagonal form. + = 'I': Compute eigenvalues and eigenvectors of the + tridiagonal matrix. Z is initialized to the identity + matrix. + + N (input) INTEGER + The order of the matrix. N >= 0. + + D (input/output) DOUBLE PRECISION array, dimension (N) + On entry, the diagonal elements of the tridiagonal matrix. + On exit, if INFO = 0, the eigenvalues in ascending order. + + E (input/output) DOUBLE PRECISION array, dimension (N-1) + On entry, the (n-1) subdiagonal elements of the tridiagonal + matrix. + On exit, E has been destroyed. + + Z (input/output) COMPLEX*16 array, dimension (LDZ, N) + On entry, if COMPZ = 'V', then Z contains the unitary + matrix used in the reduction to tridiagonal form. + On exit, if INFO = 0, then if COMPZ = 'V', Z contains the + orthonormal eigenvectors of the original Hermitian matrix, + and if COMPZ = 'I', Z contains the orthonormal eigenvectors + of the symmetric tridiagonal matrix. + If COMPZ = 'N', then Z is not referenced. + + LDZ (input) INTEGER + The leading dimension of the array Z. LDZ >= 1, and if + eigenvectors are desired, then LDZ >= max(1,N). + + WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) + If COMPZ = 'N', then WORK is not referenced. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: the algorithm has failed to find all the eigenvalues in + a total of 30*N iterations; if INFO = i, then i + elements of E have not converged to zero; on exit, D + and E contain the elements of a symmetric tridiagonal + matrix which is unitarily similar to the original + matrix. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, "N")) { + icompz = 0; + } else if (lsame_(compz, "V")) { + icompz = 1; + } else if (lsame_(compz, "I")) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZSTEQR", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = EPSILON; +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = SAFEMINIMUM; + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* + Compute the eigenvalues and eigenvectors of the tridiagonal + matrix. +*/ + + if (icompz == 2) { + zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz); + } + + nmaxit = *n * 30; + jtot = 0; + +/* + Determine where the matrix splits and choose QL or QR iteration + for each block, according to whether top or bottom diagonal + element is smaller. +*/ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_("I", &i__1, &d__[l], &e[l]); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info); + i__1 = lend - l; + dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* + QL Iteration + + Look for small subdiagonal element. +*/ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b1015); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* + QR Iteration + + Look for small superdiagonal element. +*/ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* + If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 + to compute its eigensystem. +*/ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b1015); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info); + i__1 = lendsv - lsv; + dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info); + } + +/* + Check for no convergence to an eigenvalue after a total + of N*MAXIT iterations. +*/ + + if (jtot == nmaxit) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + return 0; + } + goto L10; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_("I", n, &d__[1], info); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + return 0; + +/* End of ZSTEQR */ + +} /* zsteqr_ */ + +/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, + integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, + integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer + *m, doublecomplex *work, doublereal *rwork, integer *info) +{ + /* System generated locals */ + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, + i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + doublecomplex z__1, z__2; + + /* Builtin functions */ + double d_imag(doublecomplex *); + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, k, ii, ki, is; + static doublereal ulp; + static logical allv; + static doublereal unfl, ovfl, smin; + static logical over; + static doublereal scale; + extern logical lsame_(char *, char *); + static doublereal remax; + static logical leftv, bothv; + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *); + static logical somev; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); + + extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( + integer *, doublereal *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + static logical rightv; + extern doublereal dzasum_(integer *, doublecomplex *, integer *); + static doublereal smlnum; + extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublereal *, doublereal *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZTREVC computes some or all of the right and/or left eigenvectors of + a complex upper triangular matrix T. + + The right eigenvector x and the left eigenvector y of T corresponding + to an eigenvalue w are defined by: + + T*x = w*x, y'*T = w*y' + + where y' denotes the conjugate transpose of the vector y. + + If all eigenvectors are requested, the routine may either return the + matrices X and/or Y of right or left eigenvectors of T, or the + products Q*X and/or Q*Y, where Q is an input unitary + matrix. If T was obtained from the Schur factorization of an + original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of + right or left eigenvectors of A. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'R': compute right eigenvectors only; + = 'L': compute left eigenvectors only; + = 'B': compute both right and left eigenvectors. + + HOWMNY (input) CHARACTER*1 + = 'A': compute all right and/or left eigenvectors; + = 'B': compute all right and/or left eigenvectors, + and backtransform them using the input matrices + supplied in VR and/or VL; + = 'S': compute selected right and/or left eigenvectors, + specified by the logical array SELECT. + + SELECT (input) LOGICAL array, dimension (N) + If HOWMNY = 'S', SELECT specifies the eigenvectors to be + computed. + If HOWMNY = 'A' or 'B', SELECT is not referenced. + To select the eigenvector corresponding to the j-th + eigenvalue, SELECT(j) must be set to .TRUE.. + + N (input) INTEGER + The order of the matrix T. N >= 0. + + T (input/output) COMPLEX*16 array, dimension (LDT,N) + The upper triangular matrix T. T is modified, but restored + on exit. + + LDT (input) INTEGER + The leading dimension of the array T. LDT >= max(1,N). + + VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) + On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by ZHSEQR). + On exit, if SIDE = 'L' or 'B', VL contains: + if HOWMNY = 'A', the matrix Y of left eigenvectors of T; + VL is lower triangular. The i-th column + VL(i) of VL is the eigenvector corresponding + to T(i,i). + if HOWMNY = 'B', the matrix Q*Y; + if HOWMNY = 'S', the left eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VL, in the same order as their + eigenvalues. + If SIDE = 'R', VL is not referenced. + + LDVL (input) INTEGER + The leading dimension of the array VL. LDVL >= max(1,N) if + SIDE = 'L' or 'B'; LDVL >= 1 otherwise. + + VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) + On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must + contain an N-by-N matrix Q (usually the unitary matrix Q of + Schur vectors returned by ZHSEQR). + On exit, if SIDE = 'R' or 'B', VR contains: + if HOWMNY = 'A', the matrix X of right eigenvectors of T; + VR is upper triangular. The i-th column + VR(i) of VR is the eigenvector corresponding + to T(i,i). + if HOWMNY = 'B', the matrix Q*X; + if HOWMNY = 'S', the right eigenvectors of T specified by + SELECT, stored consecutively in the columns + of VR, in the same order as their + eigenvalues. + If SIDE = 'L', VR is not referenced. + + LDVR (input) INTEGER + The leading dimension of the array VR. LDVR >= max(1,N) if + SIDE = 'R' or 'B'; LDVR >= 1 otherwise. + + MM (input) INTEGER + The number of columns in the arrays VL and/or VR. MM >= M. + + M (output) INTEGER + The number of columns in the arrays VL and/or VR actually + used to store the eigenvectors. If HOWMNY = 'A' or 'B', M + is set to N. Each selected eigenvector occupies one + column. + + WORK (workspace) COMPLEX*16 array, dimension (2*N) + + RWORK (workspace) DOUBLE PRECISION array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + Further Details + =============== + + The algorithm used in this program is basically backward (forward) + substitution, with scaling to make the the code robust against + possible overflow. + + Each eigenvector is normalized so that the element of largest + magnitude has magnitude 1; here the magnitude of a complex number + (x,y) is taken to be |x| + |y|. + + ===================================================================== + + + Decode and test the input parameters +*/ + + /* Parameter adjustments */ + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + --rwork; + + /* Function Body */ + bothv = lsame_(side, "B"); + rightv = lsame_(side, "R") || bothv; + leftv = lsame_(side, "L") || bothv; + + allv = lsame_(howmny, "A"); + over = lsame_(howmny, "B"); + somev = lsame_(howmny, "S"); + +/* + Set M to the number of columns required to store the selected + eigenvectors. +*/ + + if (somev) { + *m = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (select[j]) { + ++(*m); + } +/* L10: */ + } + } else { + *m = *n; + } + + *info = 0; + if (! rightv && ! leftv) { + *info = -1; + } else if (! allv && ! over && ! somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1,*n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else if (*mm < *m) { + *info = -11; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTREVC", &i__1); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* Set the constants to control overflow. */ + + unfl = SAFEMINIMUM; + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = PRECISION; + smlnum = unfl * (*n / ulp); + +/* Store the diagonal elements of T in working array WORK. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + *n; + i__3 = i__ + i__ * t_dim1; + work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; +/* L20: */ + } + +/* + Compute 1-norm of each column of strictly upper triangular + part of T to control overflow in triangular solver. +*/ + + rwork[1] = 0.; + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1); +/* L30: */ + } + + if (rightv) { + +/* Compute right eigenvectors. */ + + is = *m; + for (ki = *n; ki >= 1; --ki) { + + if (somev) { + if (! select[ki]) { + goto L80; + } + } +/* Computing MAX */ + i__1 = ki + ki * t_dim1; + d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[ + ki + ki * t_dim1]), abs(d__2))); + smin = max(d__3,smlnum); + + work[1].r = 1., work[1].i = 0.; + +/* Form right-hand side. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k; + i__3 = k + ki * t_dim1; + z__1.r = -t[i__3].r, z__1.i = -t[i__3].i; + work[i__2].r = z__1.r, work[i__2].i = z__1.i; +/* L40: */ + } + +/* + Solve the triangular system: + (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +*/ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + k * t_dim1; + i__4 = ki + ki * t_dim1; + z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4] + .i; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + i__2 = k + k * t_dim1; + if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * + t_dim1]), abs(d__2)) < smin) { + i__3 = k + k * t_dim1; + t[i__3].r = smin, t[i__3].i = 0.; + } +/* L50: */ + } + + if (ki > 1) { + i__1 = ki - 1; + zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ + t_offset], ldt, &work[1], &scale, &rwork[1], info); + i__1 = ki; + work[i__1].r = scale, work[i__1].i = 0.; + } + +/* Copy the vector x or Q*x to VR and normalize. */ + + if (! over) { + zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + + ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + i__1 = ii + is * vr_dim1; + remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( + &vr[ii + is * vr_dim1]), abs(d__2))); + zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + + i__1 = *n; + for (k = ki + 1; k <= i__1; ++k) { + i__2 = k + is * vr_dim1; + vr[i__2].r = 0., vr[i__2].i = 0.; +/* L60: */ + } + } else { + if (ki > 1) { + i__1 = ki - 1; + z__1.r = scale, z__1.i = 0.; + zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[ + 1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1); + } + + ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + i__1 = ii + ki * vr_dim1; + remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( + &vr[ii + ki * vr_dim1]), abs(d__2))); + zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__1 = ki - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * t_dim1; + i__3 = k + *n; + t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; +/* L70: */ + } + + --is; +L80: + ; + } + } + + if (leftv) { + +/* Compute left eigenvectors. */ + + is = 1; + i__1 = *n; + for (ki = 1; ki <= i__1; ++ki) { + + if (somev) { + if (! select[ki]) { + goto L130; + } + } +/* Computing MAX */ + i__2 = ki + ki * t_dim1; + d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[ + ki + ki * t_dim1]), abs(d__2))); + smin = max(d__3,smlnum); + + i__2 = *n; + work[i__2].r = 1., work[i__2].i = 0.; + +/* Form right-hand side. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k; + d_cnjg(&z__2, &t[ki + k * t_dim1]); + z__1.r = -z__2.r, z__1.i = -z__2.i; + work[i__3].r = z__1.r, work[i__3].i = z__1.i; +/* L90: */ + } + +/* + Solve the triangular system: + (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. +*/ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + k * t_dim1; + i__5 = ki + ki * t_dim1; + z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5] + .i; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + i__3 = k + k * t_dim1; + if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * + t_dim1]), abs(d__2)) < smin) { + i__4 = k + k * t_dim1; + t[i__4].r = smin, t[i__4].i = 0.; + } +/* L100: */ + } + + if (ki < *n) { + i__2 = *n - ki; + zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & + i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + + 1], &scale, &rwork[1], info); + i__2 = ki; + work[i__2].r = scale, work[i__2].i = 0.; + } + +/* Copy the vector x or Q*x to VL and normalize. */ + + if (! over) { + i__2 = *n - ki + 1; + zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) + ; + + i__2 = *n - ki + 1; + ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + i__2 = ii + is * vl_dim1; + remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( + &vl[ii + is * vl_dim1]), abs(d__2))); + i__2 = *n - ki + 1; + zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); + + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + is * vl_dim1; + vl[i__3].r = 0., vl[i__3].i = 0.; +/* L110: */ + } + } else { + if (ki < *n) { + i__2 = *n - ki; + z__1.r = scale, z__1.i = 0.; + zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1], + ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki * + vl_dim1 + 1], &c__1); + } + + ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + i__2 = ii + ki * vl_dim1; + remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( + &vl[ii + ki * vl_dim1]), abs(d__2))); + zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } + +/* Set back the original diagonal elements of T. */ + + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + i__3 = k + k * t_dim1; + i__4 = k + *n; + t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; +/* L120: */ + } + + ++is; +L130: + ; + } + } + + return 0; + +/* End of ZTREVC */ + +} /* ztrevc_ */ + +/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer j; + static doublecomplex ajj; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + static logical upper; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZTRTI2 computes the inverse of a complex upper or lower triangular + matrix. + + This is the Level 2 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the matrix A is upper or lower triangular. + = 'U': Upper triangular + = 'L': Lower triangular + + DIAG (input) CHARACTER*1 + Specifies whether or not the matrix A is unit triangular. + = 'N': Non-unit triangular + = 'U': Unit triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading n by n upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTRTI2", &i__1); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + z_div(&z__1, &c_b60, &a[j + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + j * a_dim1; + z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + z_div(&z__1, &c_b60, &a[j + j * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + j * a_dim1; + z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of ZTRTI2 */ + +} /* ztrti2_ */ + +/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n, + doublecomplex *a, integer *lda, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + doublecomplex z__1; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer j, jb, nb, nn; + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, + integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), ztrti2_(char *, char * + , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical nounit; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZTRTRI computes the inverse of a complex upper or lower triangular + matrix A. + + This is the Level 3 BLAS version of the algorithm. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': A is upper triangular; + = 'L': A is lower triangular. + + DIAG (input) CHARACTER*1 + = 'N': A is non-unit triangular; + = 'U': A is unit triangular. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the triangular matrix A. If UPLO = 'U', the + leading N-by-N upper triangular part of the array A contains + the upper triangular matrix, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of the array A contains + the lower triangular matrix, and the strictly upper + triangular part of A is not referenced. If DIAG = 'U', the + diagonal elements of A are also not referenced and are + assumed to be 1. + On exit, the (triangular) inverse of the original matrix, in + the same storage format. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, A(i,i) is exactly zero. The triangular + matrix is singular and its inverse can not be computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + nounit = lsame_(diag, "N"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (! nounit && ! lsame_(diag, "U")) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZTRTRI", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0. && a[i__2].i == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* + Determine the block size for this environment. + + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + ztrti2_(uplo, diag, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & + c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & + z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda); + +/* Compute inverse of current diagonal block */ + + ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, + &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda); + i__1 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, + &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda); + } + +/* Compute inverse of current diagonal block */ + + ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); +/* L30: */ + } + } + } + + return 0; + +/* End of ZTRTRI */ + +} /* ztrtri_ */ + +/* Subroutine */ int zung2r_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZUNG2R generates an m by n complex matrix Q with orthonormal columns, + which is defined as the first n columns of a product of k elementary + reflectors of order m + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by ZGEQRF in the first k columns of its array + argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + WORK (workspace) COMPLEX*16 array, dimension (N) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNG2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + zlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); + } + if (i__ < *m) { + i__1 = *m - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + i__1 = i__ + i__ * a_dim1; + i__2 = i__; + z__1.r = 1. - tau[i__2].r, z__1.i = 0. - tau[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = l + i__ * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNG2R */ + +} /* zung2r_ */ + +/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, nb, mn; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunglq_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNGBR generates one of the complex unitary matrices Q or P**H + determined by ZGEBRD when reducing a complex matrix A to bidiagonal + form: A = Q * B * P**H. Q and P**H are defined as products of + elementary reflectors H(i) or G(i) respectively. + + If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q + is of order M: + if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n + columns of Q, where m >= n >= k; + if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an + M-by-M matrix. + + If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H + is of order N: + if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m + rows of P**H, where n >= m >= k; + if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as + an N-by-N matrix. + + Arguments + ========= + + VECT (input) CHARACTER*1 + Specifies whether the matrix Q or the matrix P**H is + required, as defined in the transformation applied by ZGEBRD: + = 'Q': generate Q; + = 'P': generate P**H. + + M (input) INTEGER + The number of rows of the matrix Q or P**H to be returned. + M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q or P**H to be returned. + N >= 0. + If VECT = 'Q', M >= N >= min(M,K); + if VECT = 'P', N >= M >= min(N,K). + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original M-by-K + matrix reduced by ZGEBRD. + If VECT = 'P', the number of rows in the original K-by-N + matrix reduced by ZGEBRD. + K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by ZGEBRD. + On exit, the M-by-N matrix Q or P**H. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= M. + + TAU (input) COMPLEX*16 array, dimension + (min(M,K)) if VECT = 'Q' + (min(N,K)) if VECT = 'P' + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i), which determines Q or P**H, as + returned by ZGEBRD in its array argument TAUQ or TAUP. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,min(M,N)). + For optimum performance LWORK >= min(M,N)*NB, where NB + is the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, "Q"); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, "P")) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + if (wantq) { + nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } else { + nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + } + lwkopt = max(1,mn) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGBR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (wantq) { + +/* + Form Q, determined by a call to ZGEBRD to reduce an m-by-k + matrix +*/ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + zungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If m < k, assume m = n + + Shift the vectors which define the elementary reflectors one + column to the right, and set the first row and column of Q + to those of the unit matrix +*/ + + for (j = *m; j >= 2; --j) { + i__1 = j * a_dim1 + 1; + a[i__1].r = 0., a[i__1].i = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + (j - 1) * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L10: */ + } +/* L20: */ + } + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + zungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* + Form P', determined by a call to ZGEBRD to reduce a k-by-n + matrix +*/ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + zunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* + If k >= n, assume m = n + + Shift the vectors which define the elementary reflectors one + row downward, and set the first row and column of P' to + those of the unit matrix +*/ + + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ - 1 + j * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L50: */ + } + i__2 = j * a_dim1 + 1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P'(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + zunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNGBR */ + +} /* zungbr_ */ + +/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, nb, nh, iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNGHR generates a complex unitary matrix Q which is defined as the + product of IHI-ILO elementary reflectors of order N, as returned by + ZGEHRD: + + Q = H(ilo) H(ilo+1) . . . H(ihi-1). + + Arguments + ========= + + N (input) INTEGER + The order of the matrix Q. N >= 0. + + ILO (input) INTEGER + IHI (input) INTEGER + ILO and IHI must have the same values as in the previous call + of ZGEHRD. Q is equal to the unit matrix except in the + submatrix Q(ilo+1:ihi,ilo+1:ihi). + 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the vectors which define the elementary reflectors, + as returned by ZGEHRD. + On exit, the N-by-N unitary matrix Q. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (N-1) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEHRD. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= IHI-ILO. + For optimum performance LWORK >= (IHI-ILO)*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1,*n)) { + *info = -2; + } else if (*ihi < min(*ilo,*n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*lwork < max(1,nh) && ! lquery) { + *info = -8; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, "ZUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = max(1,nh) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGHR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + +/* + Shift the vectors which define the elementary reflectors one + column to the right, and set the first ilo and the last n-ihi + rows and columns to those of the unit matrix +*/ + + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + (j - 1) * a_dim1; + a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; +/* L20: */ + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L50: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L60: */ + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L70: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L80: */ + } + + if (nh > 0) { + +/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ + + zungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* + ilo], &work[1], lwork, &iinfo); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNGHR */ + +} /* zunghr_ */ + +/* Subroutine */ int zungl2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, + which is defined as the first m rows of a product of k elementary + reflectors of order n + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by ZGELQF in the first k rows of its array argument A. + On exit, the m by n matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + WORK (workspace) COMPLEX*16 array, dimension (M) + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGL2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i)' to A(i:m,i:n) from the right */ + + if (i__ < *n) { + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + if (i__ < *m) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + d_cnjg(&z__1, &tau[i__]); + zlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + z__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); + } + i__1 = *n - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__1 = i__ + i__ * a_dim1; + d_cnjg(&z__2, &tau[i__]); + z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = i__ + l * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNGL2 */ + +} /* zungl2_ */ + +/* Subroutine */ int zunglq_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zungl2_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static logical lquery; + static integer lwkopt; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, + which is defined as the first M rows of a product of K elementary + reflectors of order N + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. N >= M. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. M >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th row must contain the vector which defines + the elementary reflector H(i), for i = 1,2,...,k, as returned + by ZGELQF in the first k rows of its array argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,M). + For optimum performance LWORK >= M*NB, where NB is + the optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit; + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGLQ", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGLQ", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk rows are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *n - i__ + 1; + zlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H' to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + zlarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", + &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ + ib + 1], &ldwork); + } + +/* Apply H' to columns i:n of current block */ + + i__2 = *n - i__ + 1; + zungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGLQ */ + +} /* zunglq_ */ + +/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zung2r_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, + which is defined as the first N columns of a product of K elementary + reflectors of order M + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. + + Arguments + ========= + + M (input) INTEGER + The number of rows of the matrix Q. M >= 0. + + N (input) INTEGER + The number of columns of the matrix Q. M >= N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines the + matrix Q. N >= K >= 0. + + A (input/output) COMPLEX*16 array, dimension (LDA,N) + On entry, the i-th column must contain the vector which + defines the elementary reflector H(i), for i = 1,2,...,k, as + returned by ZGEQRF in the first k columns of its array + argument A. + On exit, the M-by-N matrix Q. + + LDA (input) INTEGER + The first dimension of the array A. LDA >= max(1,M). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. LWORK >= max(1,N). + For optimum performance LWORK >= N*NB, where NB is the + optimal blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument has an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNGQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* + Determine when to cross over from blocked to unblocked code. + + Computing MAX +*/ + i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGQR", " ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* + Not enough workspace to use optimal NB: reduce NB and + determine the minimum value of NB. +*/ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGQR", " ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* + Use blocked code after the last block. + The first kk columns are handled by the block method. +*/ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__2 = *m - i__ + 1; + zlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + zlarfb_("Left", "No transpose", "Forward", "Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + zung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGQR */ + +} /* zungqr_ */ + +/* Subroutine */ int zunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, i1, i2, i3, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZUNM2L overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQLF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNM2L", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = nq - *k + i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ + c_offset], ldc, &work[1]); + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2L */ + +} /* zunm2l_ */ + +/* Subroutine */ int zunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZUNM2R overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNM2R", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2R */ + +} /* zunm2r_ */ + +/* Subroutine */ int zunmbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex + *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * + lwork, integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static logical notran, applyq; + static char transt[1]; + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C + with + SIDE = 'L' SIDE = 'R' + TRANS = 'N': P * C C * P + TRANS = 'C': P**H * C C * P**H + + Here Q and P**H are the unitary matrices determined by ZGEBRD when + reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q + and P**H are defined as products of elementary reflectors H(i) and + G(i) respectively. + + Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the + order of the unitary matrix Q or P**H that is applied. + + If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: + if nq >= k, Q = H(1) H(2) . . . H(k); + if nq < k, Q = H(1) H(2) . . . H(nq-1). + + If VECT = 'P', A is assumed to have been a K-by-NQ matrix: + if k < nq, P = G(1) G(2) . . . G(k); + if k >= nq, P = G(1) G(2) . . . G(nq-1). + + Arguments + ========= + + VECT (input) CHARACTER*1 + = 'Q': apply Q or Q**H; + = 'P': apply P or P**H. + + SIDE (input) CHARACTER*1 + = 'L': apply Q, Q**H, P or P**H from the Left; + = 'R': apply Q, Q**H, P or P**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q or P; + = 'C': Conjugate transpose, apply Q**H or P**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + If VECT = 'Q', the number of columns in the original + matrix reduced by ZGEBRD. + If VECT = 'P', the number of rows in the original + matrix reduced by ZGEBRD. + K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,min(nq,K)) if VECT = 'Q' + (LDA,nq) if VECT = 'P' + The vectors which define the elementary reflectors H(i) and + G(i), whose products determine the matrices Q and P, as + returned by ZGEBRD. + + LDA (input) INTEGER + The leading dimension of the array A. + If VECT = 'Q', LDA >= max(1,nq); + if VECT = 'P', LDA >= max(1,min(nq,K)). + + TAU (input) COMPLEX*16 array, dimension (min(nq,K)) + TAU(i) must contain the scalar factor of the elementary + reflector H(i) or G(i) which determines Q or P, as returned + by ZGEBRD in the array argument TAUQ or TAUP. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q + or P*C or P**H*C or C*P or C*P**H. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, "Q"); + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! applyq && ! lsame_(vect, "P")) { + *info = -1; + } else if (! left && ! lsame_(side, "R")) { + *info = -2; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMBR", &i__1); + return 0; + } else if (lquery) { + } + +/* Quick return if possible */ + + work[1].r = 1., work[1].i = 0.; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to ZGEBRD with nq >= k */ + + zunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* Q was determined by a call to ZGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + zunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to ZGEBRD with nq > k */ + + zunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo); + } else if (nq > 1) { + +/* P was determined by a call to ZGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + zunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo); + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMBR */ + +} /* zunmbr_ */ + +/* Subroutine */ int zunml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + static doublecomplex aii; + static logical left; + static doublecomplex taui; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); + static logical notran; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + September 30, 1994 + + + Purpose + ======= + + ZUNML2 overwrites the general complex m-by-n matrix C with + + Q * C if SIDE = 'L' and TRANS = 'N', or + + Q'* C if SIDE = 'L' and TRANS = 'C', or + + C * Q if SIDE = 'R' and TRANS = 'N', or + + C * Q' if SIDE = 'R' and TRANS = 'C', + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q' from the Left + = 'R': apply Q or Q' from the Right + + TRANS (input) CHARACTER*1 + = 'N': apply Q (No transpose) + = 'C': apply Q' (Conjugate transpose) + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the m-by-n matrix C. + On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace) COMPLEX*16 array, dimension + (N) if SIDE = 'L', + (M) if SIDE = 'R' + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNML2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)' */ + + if (notran) { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } else { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } + if (i__ < nq) { + i__3 = nq - i__; + zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1]); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; + if (i__ < nq) { + i__3 = nq - i__; + zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of ZUNML2 */ + +} /* zunml2_ */ + +/* Subroutine */ int zunmlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunml2_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static char transt[1]; + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNMLQ overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k)' . . . H(2)' H(1)' + + as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L', + (LDA,N) if SIDE = 'R' + The i-th row must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGELQF in the first k rows of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,K). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGELQF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMLQ", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + zlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + zlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], + ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMLQ */ + +} /* zunmlq_ */ + +/* Subroutine */ int zunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunm2l_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNMQL overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(k) . . . H(2) H(1) + + as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQLF in the last k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQLF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMQL", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i+ib-1) . . . H(i+1) H(i) +*/ + + i__4 = nq - *k + i__ + ib - 1; + zlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], t, &c__65); + if (left) { + +/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H' */ + + zlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & + work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQL */ + +} /* zunmql_ */ + +/* Subroutine */ int zunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i__; + static doublecomplex t[4160] /* was [65][64] */; + static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; + static logical left; + extern logical lsame_(char *, char *); + static integer nbmin, iinfo; + extern /* Subroutine */ int zunm2r_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *); + static logical notran; + static integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *); + static integer lwkopt; + static logical lquery; + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNMQR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix defined as the product of k + elementary reflectors + + Q = H(1) H(2) . . . H(k) + + as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N + if SIDE = 'R'. + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + K (input) INTEGER + The number of elementary reflectors whose product defines + the matrix Q. + If SIDE = 'L', M >= K >= 0; + if SIDE = 'R', N >= K >= 0. + + A (input) COMPLEX*16 array, dimension (LDA,K) + The i-th column must contain the vector which defines the + elementary reflector H(i), for i = 1,2,...,k, as returned by + ZGEQRF in the first k columns of its array argument A. + A is modified by the routine but restored on exit. + + LDA (input) INTEGER + The leading dimension of the array A. + If SIDE = 'L', LDA >= max(1,M); + if SIDE = 'R', LDA >= max(1,N). + + TAU (input) COMPLEX*16 array, dimension (K) + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZGEQRF. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >= M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + notran = lsame_(trans, "N"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! notran && ! lsame_(trans, "C")) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* + Determine the block size. NB may be at most NBMAX, where NBMAX + is used to define the local array T. + + Computing MIN + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("ZUNMQR", &i__1); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + iws = nw * nb; + if (*lwork < iws) { + nb = *lwork / ldwork; +/* + Computing MAX + Writing concatenation +*/ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } else { + iws = nw; + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo); + } else { + +/* Use blocked code */ + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* + Form the triangular factor of the block reflector + H = H(i) H(i+1) . . . H(i+ib-1) +*/ + + i__4 = nq - i__ + 1; + zlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], t, &c__65) + ; + if (left) { + +/* H or H' is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H' is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H' */ + + zlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQR */ + +} /* zunmqr_ */ + +/* Subroutine */ int zunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + static integer i1, i2, nb, mi, ni, nq, nw; + static logical left; + extern logical lsame_(char *, char *); + static integer iinfo; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + static integer lwkopt; + static logical lquery; + extern /* Subroutine */ int zunmql_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ZUNMTR overwrites the general complex M-by-N matrix C with + + SIDE = 'L' SIDE = 'R' + TRANS = 'N': Q * C C * Q + TRANS = 'C': Q**H * C C * Q**H + + where Q is a complex unitary matrix of order nq, with nq = m if + SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of + nq-1 elementary reflectors, as returned by ZHETRD: + + if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); + + if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). + + Arguments + ========= + + SIDE (input) CHARACTER*1 + = 'L': apply Q or Q**H from the Left; + = 'R': apply Q or Q**H from the Right. + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A contains elementary reflectors + from ZHETRD; + = 'L': Lower triangle of A contains elementary reflectors + from ZHETRD. + + TRANS (input) CHARACTER*1 + = 'N': No transpose, apply Q; + = 'C': Conjugate transpose, apply Q**H. + + M (input) INTEGER + The number of rows of the matrix C. M >= 0. + + N (input) INTEGER + The number of columns of the matrix C. N >= 0. + + A (input) COMPLEX*16 array, dimension + (LDA,M) if SIDE = 'L' + (LDA,N) if SIDE = 'R' + The vectors which define the elementary reflectors, as + returned by ZHETRD. + + LDA (input) INTEGER + The leading dimension of the array A. + LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. + + TAU (input) COMPLEX*16 array, dimension + (M-1) if SIDE = 'L' + (N-1) if SIDE = 'R' + TAU(i) must contain the scalar factor of the elementary + reflector H(i), as returned by ZHETRD. + + C (input/output) COMPLEX*16 array, dimension (LDC,N) + On entry, the M-by-N matrix C. + On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. + + LDC (input) INTEGER + The leading dimension of the array C. LDC >= max(1,M). + + WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) + On exit, if INFO = 0, WORK(1) returns the optimal LWORK. + + LWORK (input) INTEGER + The dimension of the array WORK. + If SIDE = 'L', LWORK >= max(1,N); + if SIDE = 'R', LWORK >= max(1,M). + For optimum performance LWORK >= N*NB if SIDE = 'L', and + LWORK >=M*NB if SIDE = 'R', where NB is the optimal + blocksize. + + If LWORK = -1, then a workspace query is assumed; the routine + only calculates the optimal size of the WORK array, returns + this value as the first entry of the WORK array, and no error + message related to LWORK is issued by XERBLA. + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input arguments +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, "L"); + upper = lsame_(uplo, "U"); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (! left && ! lsame_(side, "R")) { + *info = -1; + } else if (! upper && ! lsame_(uplo, "L")) { + *info = -2; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "C")) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < max(1,nw) && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = max(1,nw) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_("ZUNMTR", &i__2); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to ZHETRD with UPLO = 'U' */ + + i__2 = nq - 1; + zunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); + } else { + +/* Q was determined by a call to ZHETRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + zunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMTR */ + +} /* zunmtr_ */ + diff --git a/numpy/linalg/lapack_lite/make_lite.py b/numpy/linalg/lapack_lite/make_lite.py index 2126f267f..b8f1ac89c 100755 --- a/numpy/linalg/lapack_lite/make_lite.py +++ b/numpy/linalg/lapack_lite/make_lite.py @@ -24,7 +24,7 @@ import clapack_scrub # -C to check array subscripts F2C_ARGS = ['-A'] -# The header to add to the top of the *_lite.c file. Note that dlamch_() calls +# The header to add to the top of the f2c_*.c file. Note that dlamch_() calls # will be replaced by the macros below by clapack_scrub.scrub_source() HEADER = '''\ /* @@ -164,12 +164,18 @@ class FortranLibrary(object): class LapackLibrary(FortranLibrary): def _newFortranRoutine(self, rname, filename): routine = FortranLibrary._newFortranRoutine(self, rname, filename) - if 'BLAS' in filename: + if 'blas' in filename.lower(): routine.type = 'blas' elif rname.startswith('z'): - routine.type = 'zlapack' + routine.type = 'z_lapack' + elif rname.startswith('c'): + routine.type = 'c_lapack' + elif rname.startswith('s'): + routine.type = 's_lapack' + elif rname.startswith('d'): + routine.type = 'd_lapack' else: - routine.type = 'dlapack' + routine.type = 'lapack' return routine def allRoutinesByType(self, typename): @@ -216,7 +222,7 @@ def getWrappedRoutineNames(wrapped_routines_file): routines.append(line) return routines, ignores -types = {'blas', 'zlapack', 'dlapack'} +types = {'blas', 'lapack', 'd_lapack', 's_lapack', 'z_lapack', 'c_lapack'} def dumpRoutineNames(library, output_dir): for typename in {'unknown'} | types: @@ -267,7 +273,7 @@ def main(): dumpRoutineNames(library, output_dir) for typename in types: - fortran_file = os.path.join(output_dir, '%s_lite.f' % typename) + fortran_file = os.path.join(output_dir, 'f2c_%s.f' % typename) c_file = fortran_file[:-2] + '.c' print('creating %s ...' % c_file) routines = library.allRoutinesByType(typename) diff --git a/numpy/linalg/lapack_lite/zlapack_lite.c b/numpy/linalg/lapack_lite/zlapack_lite.c deleted file mode 100644 index 143b7254f..000000000 --- a/numpy/linalg/lapack_lite/zlapack_lite.c +++ /dev/null @@ -1,26995 +0,0 @@ -/* -NOTE: This is generated code. Look in Misc/lapack_lite for information on - remaking this file. -*/ -#include "f2c.h" - -#ifdef HAVE_CONFIG -#include "config.h" -#else -extern doublereal dlamch_(char *); -#define EPSILON dlamch_("Epsilon") -#define SAFEMINIMUM dlamch_("Safe minimum") -#define PRECISION dlamch_("Precision") -#define BASE dlamch_("Base") -#endif - -extern doublereal dlapy2_(doublereal *x, doublereal *y); - -/* -f2c knows the exact rules for precedence, and so omits parentheses where not -strictly necessary. Since this is generated code, we don't really care if -it's readable, and we know what is written is correct. So don't warn about -them. -*/ -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wparentheses" -#endif - - -/* Table of constant values */ - -static integer c__1 = 1; -static doublecomplex c_b59 = {0.,0.}; -static doublecomplex c_b60 = {1.,0.}; -static integer c_n1 = -1; -static integer c__3 = 3; -static integer c__2 = 2; -static integer c__0 = 0; -static integer c__8 = 8; -static integer c__4 = 4; -static integer c__65 = 65; -static integer c__6 = 6; -static integer c__9 = 9; -static doublereal c_b324 = 0.; -static doublereal c_b1015 = 1.; -static integer c__15 = 15; -static logical c_false = FALSE_; -static doublereal c_b1294 = -1.; -static doublereal c_b2210 = .5; - -/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx, - doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s) -{ - /* System generated locals */ - integer i__1, i__2, i__3, i__4; - doublecomplex z__1, z__2, z__3; - - /* Local variables */ - static integer i__, ix, iy; - static doublecomplex ctemp; - - -/* - applies a plane rotation, where the cos and sin (c and s) are real - and the vectors cx and cy are complex. - jack dongarra, linpack, 3/11/78. - - - ===================================================================== -*/ - - /* Parameter adjustments */ - --cy; - --cx; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - if (*incx == 1 && *incy == 1) { - goto L20; - } - -/* - code for unequal increments or equal increments not equal - to 1 -*/ - - ix = 1; - iy = 1; - if (*incx < 0) { - ix = (-(*n) + 1) * *incx + 1; - } - if (*incy < 0) { - iy = (-(*n) + 1) * *incy + 1; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ix; - z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; - i__3 = iy; - z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - ctemp.r = z__1.r, ctemp.i = z__1.i; - i__2 = iy; - i__3 = iy; - z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; - i__4 = ix; - z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; - cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; - i__2 = ix; - cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; - ix += *incx; - iy += *incy; -/* L10: */ - } - return 0; - -/* code for both increments equal to 1 */ - -L20: - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i; - i__3 = i__; - z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - ctemp.r = z__1.r, ctemp.i = z__1.i; - i__2 = i__; - i__3 = i__; - z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i; - i__4 = i__; - z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; - cy[i__2].r = z__1.r, cy[i__2].i = z__1.i; - i__2 = i__; - cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i; -/* L30: */ - } - return 0; -} /* zdrot_ */ - -/* Subroutine */ int zgebak_(char *job, char *side, integer *n, integer *ilo, - integer *ihi, doublereal *scale, integer *m, doublecomplex *v, - integer *ldv, integer *info) -{ - /* System generated locals */ - integer v_dim1, v_offset, i__1; - - /* Local variables */ - static integer i__, k; - static doublereal s; - static integer ii; - extern logical lsame_(char *, char *); - static logical leftv; - extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), xerbla_(char *, integer *), - zdscal_(integer *, doublereal *, doublecomplex *, integer *); - static logical rightv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGEBAK forms the right or left eigenvectors of a complex general - matrix by backward transformation on the computed eigenvectors of the - balanced matrix output by ZGEBAL. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the type of backward transformation required: - = 'N', do nothing, return immediately; - = 'P', do backward transformation for permutation only; - = 'S', do backward transformation for scaling only; - = 'B', do backward transformations for both permutation and - scaling. - JOB must be the same as the argument JOB supplied to ZGEBAL. - - SIDE (input) CHARACTER*1 - = 'R': V contains right eigenvectors; - = 'L': V contains left eigenvectors. - - N (input) INTEGER - The number of rows of the matrix V. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - The integers ILO and IHI determined by ZGEBAL. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - SCALE (input) DOUBLE PRECISION array, dimension (N) - Details of the permutation and scaling factors, as returned - by ZGEBAL. - - M (input) INTEGER - The number of columns of the matrix V. M >= 0. - - V (input/output) COMPLEX*16 array, dimension (LDV,M) - On entry, the matrix of right or left eigenvectors to be - transformed, as returned by ZHSEIN or ZTREVC. - On exit, V is overwritten by the transformed eigenvectors. - - LDV (input) INTEGER - The leading dimension of the array V. LDV >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Decode and Test the input parameters -*/ - - /* Parameter adjustments */ - --scale; - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - - /* Function Body */ - rightv = lsame_(side, "R"); - leftv = lsame_(side, "L"); - - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (! rightv && ! leftv) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*m < 0) { - *info = -7; - } else if (*ldv < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEBAK", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*m == 0) { - return 0; - } - if (lsame_(job, "N")) { - return 0; - } - - if (*ilo == *ihi) { - goto L30; - } - -/* Backward balance */ - - if (lsame_(job, "S") || lsame_(job, "B")) { - - if (rightv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = scale[i__]; - zdscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L10: */ - } - } - - if (leftv) { - i__1 = *ihi; - for (i__ = *ilo; i__ <= i__1; ++i__) { - s = 1. / scale[i__]; - zdscal_(m, &s, &v[i__ + v_dim1], ldv); -/* L20: */ - } - } - - } - -/* - Backward permutation - - For I = ILO-1 step -1 until 1, - IHI+1 step 1 until N do -- -*/ - -L30: - if (lsame_(job, "P") || lsame_(job, "B")) { - if (rightv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L40; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = (integer) scale[i__]; - if (k == i__) { - goto L40; - } - zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L40: - ; - } - } - - if (leftv) { - i__1 = *n; - for (ii = 1; ii <= i__1; ++ii) { - i__ = ii; - if (i__ >= *ilo && i__ <= *ihi) { - goto L50; - } - if (i__ < *ilo) { - i__ = *ilo - ii; - } - k = (integer) scale[i__]; - if (k == i__) { - goto L50; - } - zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); -L50: - ; - } - } - } - - return 0; - -/* End of ZGEBAK */ - -} /* zgebak_ */ - -/* Subroutine */ int zgebal_(char *job, integer *n, doublecomplex *a, integer - *lda, integer *ilo, integer *ihi, doublereal *scale, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1, d__2; - - /* Builtin functions */ - double d_imag(doublecomplex *), z_abs(doublecomplex *); - - /* Local variables */ - static doublereal c__, f, g; - static integer i__, j, k, l, m; - static doublereal r__, s, ca, ra; - static integer ica, ira, iexc; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static doublereal sfmin1, sfmin2, sfmax1, sfmax2; - - extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( - integer *, doublereal *, doublecomplex *, integer *); - extern integer izamax_(integer *, doublecomplex *, integer *); - static logical noconv; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGEBAL balances a general complex matrix A. This involves, first, - permuting A by a similarity transformation to isolate eigenvalues - in the first 1 to ILO-1 and last IHI+1 to N elements on the - diagonal; and second, applying a diagonal similarity transformation - to rows and columns ILO to IHI to make the rows and columns as - close in norm as possible. Both steps are optional. - - Balancing may reduce the 1-norm of the matrix, and improve the - accuracy of the computed eigenvalues and/or eigenvectors. - - Arguments - ========= - - JOB (input) CHARACTER*1 - Specifies the operations to be performed on A: - = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 - for i = 1,...,N; - = 'P': permute only; - = 'S': scale only; - = 'B': both permute and scale. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the input matrix A. - On exit, A is overwritten by the balanced matrix. - If JOB = 'N', A is not referenced. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - ILO (output) INTEGER - IHI (output) INTEGER - ILO and IHI are set to integers such that on exit - A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. - If JOB = 'N' or 'S', ILO = 1 and IHI = N. - - SCALE (output) DOUBLE PRECISION array, dimension (N) - Details of the permutations and scaling factors applied to - A. If P(j) is the index of the row and column interchanged - with row and column j and D(j) is the scaling factor - applied to row and column j, then - SCALE(j) = P(j) for j = 1,...,ILO-1 - = D(j) for j = ILO,...,IHI - = P(j) for j = IHI+1,...,N. - The order in which the interchanges are made is N to IHI+1, - then 1 to ILO-1. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The permutations consist of row and column interchanges which put - the matrix in the form - - ( T1 X Y ) - P A P = ( 0 B Z ) - ( 0 0 T2 ) - - where T1 and T2 are upper triangular matrices whose eigenvalues lie - along the diagonal. The column indices ILO and IHI mark the starting - and ending columns of the submatrix B. Balancing consists of applying - a diagonal similarity transformation inv(D) * B * D to make the - 1-norms of each row of B and its corresponding column nearly equal. - The output matrix is - - ( T1 X*D Y ) - ( 0 inv(D)*B*D inv(D)*Z ). - ( 0 0 T2 ) - - Information about the permutations P and the diagonal matrix D is - returned in the vector SCALE. - - This subroutine is based on the EISPACK routine CBAL. - - Modified by Tzu-Yi Chen, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --scale; - - /* Function Body */ - *info = 0; - if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") - && ! lsame_(job, "B")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEBAL", &i__1); - return 0; - } - - k = 1; - l = *n; - - if (*n == 0) { - goto L210; - } - - if (lsame_(job, "N")) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scale[i__] = 1.; -/* L10: */ - } - goto L210; - } - - if (lsame_(job, "S")) { - goto L120; - } - -/* Permutation to isolate eigenvalues if possible */ - - goto L50; - -/* Row and column exchange. */ - -L20: - scale[m] = (doublereal) j; - if (j == m) { - goto L30; - } - - zswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); - i__1 = *n - k + 1; - zswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); - -L30: - switch (iexc) { - case 1: goto L40; - case 2: goto L80; - } - -/* Search for rows isolating an eigenvalue and push them down. */ - -L40: - if (l == 1) { - goto L210; - } - --l; - -L50: - for (j = l; j >= 1; --j) { - - i__1 = l; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ == j) { - goto L60; - } - i__2 = j + i__ * a_dim1; - if (a[i__2].r != 0. || d_imag(&a[j + i__ * a_dim1]) != 0.) { - goto L70; - } -L60: - ; - } - - m = l; - iexc = 1; - goto L20; -L70: - ; - } - - goto L90; - -/* Search for columns isolating an eigenvalue and push them left. */ - -L80: - ++k; - -L90: - i__1 = l; - for (j = k; j <= i__1; ++j) { - - i__2 = l; - for (i__ = k; i__ <= i__2; ++i__) { - if (i__ == j) { - goto L100; - } - i__3 = i__ + j * a_dim1; - if (a[i__3].r != 0. || d_imag(&a[i__ + j * a_dim1]) != 0.) { - goto L110; - } -L100: - ; - } - - m = k; - iexc = 2; - goto L20; -L110: - ; - } - -L120: - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - scale[i__] = 1.; -/* L130: */ - } - - if (lsame_(job, "P")) { - goto L210; - } - -/* - Balance the submatrix in rows K to L. - - Iterative loop for norm reduction -*/ - - sfmin1 = SAFEMINIMUM / PRECISION; - sfmax1 = 1. / sfmin1; - sfmin2 = sfmin1 * 8.; - sfmax2 = 1. / sfmin2; -L140: - noconv = FALSE_; - - i__1 = l; - for (i__ = k; i__ <= i__1; ++i__) { - c__ = 0.; - r__ = 0.; - - i__2 = l; - for (j = k; j <= i__2; ++j) { - if (j == i__) { - goto L150; - } - i__3 = j + i__ * a_dim1; - c__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * - a_dim1]), abs(d__2)); - i__3 = i__ + j * a_dim1; - r__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + j * - a_dim1]), abs(d__2)); -L150: - ; - } - ica = izamax_(&l, &a[i__ * a_dim1 + 1], &c__1); - ca = z_abs(&a[ica + i__ * a_dim1]); - i__2 = *n - k + 1; - ira = izamax_(&i__2, &a[i__ + k * a_dim1], lda); - ra = z_abs(&a[i__ + (ira + k - 1) * a_dim1]); - -/* Guard against zero C or R due to underflow. */ - - if (c__ == 0. || r__ == 0.) { - goto L200; - } - g = r__ / 8.; - f = 1.; - s = c__ + r__; -L160: -/* Computing MAX */ - d__1 = max(f,c__); -/* Computing MIN */ - d__2 = min(r__,g); - if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) { - goto L170; - } - f *= 8.; - c__ *= 8.; - ca *= 8.; - r__ /= 8.; - g /= 8.; - ra /= 8.; - goto L160; - -L170: - g = c__ / 8.; -L180: -/* Computing MIN */ - d__1 = min(f,c__), d__1 = min(d__1,g); - if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) { - goto L190; - } - f /= 8.; - c__ /= 8.; - g /= 8.; - ca /= 8.; - r__ *= 8.; - ra *= 8.; - goto L180; - -/* Now balance. */ - -L190: - if (c__ + r__ >= s * .95) { - goto L200; - } - if (f < 1. && scale[i__] < 1.) { - if (f * scale[i__] <= sfmin1) { - goto L200; - } - } - if (f > 1. && scale[i__] > 1.) { - if (scale[i__] >= sfmax1 / f) { - goto L200; - } - } - g = 1. / f; - scale[i__] *= f; - noconv = TRUE_; - - i__2 = *n - k + 1; - zdscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); - zdscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); - -L200: - ; - } - - if (noconv) { - goto L140; - } - -L210: - *ilo = k; - *ihi = l; - - return 0; - -/* End of ZGEBAL */ - -} /* zgebal_ */ - -/* Subroutine */ int zgebd2_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, - doublecomplex *taup, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__; - static doublecomplex alpha; - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGEBD2 reduces a complex general m by n matrix A to upper or lower - real bidiagonal form B by a unitary transformation: Q' * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the unitary matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the unitary matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) COMPLEX*16 array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - WORK (workspace) COMPLEX*16 array, dimension (max(M,N)) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in - A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in - A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, v and u are complex vectors; - v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); - u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); - tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("ZGEBD2", &i__1); - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & - tauq[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Apply H(i)' to A(i:m,i+1:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - d_cnjg(&z__1, &tauq[i__]); - zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1, - &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - i__2 = i__ + i__ * a_dim1; - i__3 = i__; - a[i__2].r = d__[i__3], a[i__2].i = 0.; - - if (i__ < *n) { - -/* - Generate elementary reflector G(i) to annihilate - A(i,i+2:n) -*/ - - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + (i__ + 1) * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Apply G(i) to A(i+1:m,i+1:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__; - zlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], - lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &work[1]); - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + (i__ + 1) * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.; - } else { - i__2 = i__; - taup[i__2].r = 0., taup[i__2].i = 0.; - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Apply G(i) to A(i+1:m,i:n) from the right */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; -/* Computing MIN */ - i__4 = i__ + 1; - zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[ - i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]); - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__; - a[i__2].r = d__[i__3], a[i__2].i = 0.; - - if (i__ < *m) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:m,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, - &tauq[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Apply H(i)' to A(i+1:m,i+1:n) from the left */ - - i__2 = *m - i__; - i__3 = *n - i__; - d_cnjg(&z__1, &tauq[i__]); - zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & - c__1, &z__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, & - work[1]); - i__2 = i__ + 1 + i__ * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.; - } else { - i__2 = i__; - tauq[i__2].r = 0., tauq[i__2].i = 0.; - } -/* L20: */ - } - } - return 0; - -/* End of ZGEBD2 */ - -} /* zgebd2_ */ - -/* Subroutine */ int zgebrd_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq, - doublecomplex *taup, doublecomplex *work, integer *lwork, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j, nb, nx; - static doublereal ws; - static integer nbmin, iinfo, minmn; - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zgebd2_(integer *, integer *, - doublecomplex *, integer *, doublereal *, doublereal *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *), - xerbla_(char *, integer *), zlabrd_(integer *, integer *, - integer *, doublecomplex *, integer *, doublereal *, doublereal *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer ldwrkx, ldwrky, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGEBRD reduces a general complex M-by-N matrix A to upper or lower - bidiagonal form B by a unitary transformation: Q**H * A * P = B. - - If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. M >= 0. - - N (input) INTEGER - The number of columns in the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N general matrix to be reduced. - On exit, - if m >= n, the diagonal and the first superdiagonal are - overwritten with the upper bidiagonal matrix B; the - elements below the diagonal, with the array TAUQ, represent - the unitary matrix Q as a product of elementary - reflectors, and the elements above the first superdiagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors; - if m < n, the diagonal and the first subdiagonal are - overwritten with the lower bidiagonal matrix B; the - elements below the first subdiagonal, with the array TAUQ, - represent the unitary matrix Q as a product of - elementary reflectors, and the elements above the diagonal, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (min(M,N)) - The diagonal elements of the bidiagonal matrix B: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (min(M,N)-1) - The off-diagonal elements of the bidiagonal matrix B: - if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; - if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. - - TAUQ (output) COMPLEX*16 array dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,M,N). - For optimum performance LWORK >= (M+N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - If m >= n, - - Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in - A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in - A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, - - Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The contents of A on exit are illustrated by the following examples: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) - ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) - ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) - ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) - ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) - ( v1 v2 v3 v4 v5 ) - - where d and e denote diagonal and off-diagonal elements of B, vi - denotes an element of the vector defining H(i), and ui an element of - the vector defining G(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - --work; - - /* Function Body */ - *info = 0; -/* Computing MAX */ - i__1 = 1, i__2 = ilaenv_(&c__1, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = max(i__1,i__2); - lwkopt = (*m + *n) * nb; - d__1 = (doublereal) lwkopt; - work[1].r = d__1, work[1].i = 0.; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = max(1,*m); - if (*lwork < max(i__1,*n) && ! lquery) { - *info = -10; - } - } - if (*info < 0) { - i__1 = -(*info); - xerbla_("ZGEBRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - minmn = min(*m,*n); - if (minmn == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - ws = (doublereal) max(*m,*n); - ldwrkx = *m; - ldwrky = *n; - - if (nb > 1 && nb < minmn) { - -/* - Set the crossover point NX. - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - -/* Determine when to switch from blocked to unblocked code. */ - - if (nx < minmn) { - ws = (doublereal) ((*m + *n) * nb); - if ((doublereal) (*lwork) < ws) { - -/* - Not enough work space for the optimal NB, consider using - a smaller block size. -*/ - - nbmin = ilaenv_(&c__2, "ZGEBRD", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - if (*lwork >= (*m + *n) * nbmin) { - nb = *lwork / (*m + *n); - } else { - nb = 1; - nx = minmn; - } - } - } - } else { - nx = minmn; - } - - i__1 = minmn - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - -/* - Reduce rows and columns i:i+ib-1 to bidiagonal form and return - the matrices X and Y which are needed to update the unreduced - part of the matrix -*/ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ + 1; - zlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ - i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx - * nb + 1], &ldwrky); - -/* - Update the trailing submatrix A(i+ib:m,i+ib:n), using - an update of the form A := A - V*Y' - X*U' -*/ - - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, & - z__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + - nb + 1], &ldwrky, &c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], - lda); - i__3 = *m - i__ - nb + 1; - i__4 = *n - i__ - nb + 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &z__1, & - work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & - c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], lda); - -/* Copy diagonal and off-diagonal elements of B back into A */ - - if (*m >= *n) { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + j * a_dim1; - i__5 = j; - a[i__4].r = d__[i__5], a[i__4].i = 0.; - i__4 = j + (j + 1) * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.; -/* L10: */ - } - } else { - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + j * a_dim1; - i__5 = j; - a[i__4].r = d__[i__5], a[i__4].i = 0.; - i__4 = j + 1 + j * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.; -/* L20: */ - } - } -/* L30: */ - } - -/* Use unblocked code to reduce the remainder of the matrix */ - - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - zgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & - tauq[i__], &taup[i__], &work[1], &iinfo); - work[1].r = ws, work[1].i = 0.; - return 0; - -/* End of ZGEBRD */ - -} /* zgebrd_ */ - -/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n, - doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl, - integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, - integer *lwork, doublereal *rwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4; - doublereal d__1, d__2; - doublecomplex z__1, z__2; - - /* Builtin functions */ - double sqrt(doublereal), d_imag(doublecomplex *); - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, k, ihi; - static doublereal scl; - static integer ilo; - static doublereal dum[1], eps; - static doublecomplex tmp; - static integer ibal; - static char side[1]; - static integer maxb; - static doublereal anrm; - static integer ierr, itau, iwrk, nout; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); - extern doublereal dznrm2_(integer *, doublecomplex *, integer *); - static logical scalea; - - static doublereal cscale; - extern /* Subroutine */ int zgebak_(char *, char *, integer *, integer *, - integer *, doublereal *, integer *, doublecomplex *, integer *, - integer *), zgebal_(char *, integer *, - doublecomplex *, integer *, integer *, integer *, doublereal *, - integer *); - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical select[1]; - extern /* Subroutine */ int zdscal_(integer *, doublereal *, - doublecomplex *, integer *); - static doublereal bignum; - extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, - integer *, doublereal *); - extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *), zlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublecomplex *, - integer *, integer *), zlacpy_(char *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, integer *); - static integer minwrk, maxwrk; - static logical wantvl; - static doublereal smlnum; - static integer hswork, irwork; - extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrevc_(char *, char *, logical *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, integer *, integer *, doublecomplex *, - doublereal *, integer *); - static logical lquery, wantvr; - extern /* Subroutine */ int zunghr_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the - eigenvalues and, optionally, the left and/or right eigenvectors. - - The right eigenvector v(j) of A satisfies - A * v(j) = lambda(j) * v(j) - where lambda(j) is its eigenvalue. - The left eigenvector u(j) of A satisfies - u(j)**H * A = lambda(j) * u(j)**H - where u(j)**H denotes the conjugate transpose of u(j). - - The computed eigenvectors are normalized to have Euclidean norm - equal to 1 and largest component real. - - Arguments - ========= - - JOBVL (input) CHARACTER*1 - = 'N': left eigenvectors of A are not computed; - = 'V': left eigenvectors of are computed. - - JOBVR (input) CHARACTER*1 - = 'N': right eigenvectors of A are not computed; - = 'V': right eigenvectors of A are computed. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the N-by-N matrix A. - On exit, A has been overwritten. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) COMPLEX*16 array, dimension (N) - W contains the computed eigenvalues. - - VL (output) COMPLEX*16 array, dimension (LDVL,N) - If JOBVL = 'V', the left eigenvectors u(j) are stored one - after another in the columns of VL, in the same order - as their eigenvalues. - If JOBVL = 'N', VL is not referenced. - u(j) = VL(:,j), the j-th column of VL. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= 1; if - JOBVL = 'V', LDVL >= N. - - VR (output) COMPLEX*16 array, dimension (LDVR,N) - If JOBVR = 'V', the right eigenvectors v(j) are stored one - after another in the columns of VR, in the same order - as their eigenvalues. - If JOBVR = 'N', VR is not referenced. - v(j) = VR(:,j), the j-th column of VR. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= 1; if - JOBVR = 'V', LDVR >= N. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,2*N). - For good performance, LWORK must generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the - eigenvalues, and no eigenvectors have been computed; - elements and i+1:N of W contain eigenvalues which have - converged. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - --rwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1; - wantvl = lsame_(jobvl, "V"); - wantvr = lsame_(jobvr, "V"); - if (! wantvl && ! lsame_(jobvl, "N")) { - *info = -1; - } else if (! wantvr && ! lsame_(jobvr, "N")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldvl < 1 || wantvl && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || wantvr && *ldvr < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - CWorkspace refers to complex workspace, and RWorkspace to real - workspace. NB refers to the optimal block size for the - immediately following subroutine, as returned by ILAENV. - HSWORK refers to the workspace preferred by ZHSEQR, as - calculated below. HSWORK is computed assuming ILO=1 and IHI=N, - the worst case.) -*/ - - minwrk = 1; - if (*info == 0 && (*lwork >= 1 || lquery)) { - maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, ( - ftnlen)6, (ftnlen)1); - if (! wantvl && ! wantvr) { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 1; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "ZHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); - maxwrk = max(maxwrk,hswork); - } else { -/* Computing MAX */ - i__1 = 1, i__2 = *n << 1; - minwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR", - " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = ilaenv_(&c__8, "ZHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen) - 6, (ftnlen)2); - maxb = max(i__1,2); -/* - Computing MIN - Computing MAX -*/ - i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SV", n, &c__1, n, & - c_n1, (ftnlen)6, (ftnlen)2); - i__1 = min(maxb,*n), i__2 = max(i__3,i__4); - k = min(i__1,i__2); -/* Computing MAX */ - i__1 = k * (k + 2), i__2 = *n << 1; - hswork = max(i__1,i__2); -/* Computing MAX */ - i__1 = max(maxwrk,hswork), i__2 = *n << 1; - maxwrk = max(i__1,i__2); - } - work[1].r = (doublereal) maxwrk, work[1].i = 0.; - } - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEEV ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Get machine constants */ - - eps = PRECISION; - smlnum = SAFEMINIMUM; - bignum = 1. / smlnum; - dlabad_(&smlnum, &bignum); - smlnum = sqrt(smlnum) / eps; - bignum = 1. / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = zlange_("M", n, n, &a[a_offset], lda, dum); - scalea = FALSE_; - if (anrm > 0. && anrm < smlnum) { - scalea = TRUE_; - cscale = smlnum; - } else if (anrm > bignum) { - scalea = TRUE_; - cscale = bignum; - } - if (scalea) { - zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & - ierr); - } - -/* - Balance the matrix - (CWorkspace: none) - (RWorkspace: need N) -*/ - - ibal = 1; - zgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr); - -/* - Reduce to upper Hessenberg form - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: none) -*/ - - itau = 1; - iwrk = itau + *n; - i__1 = *lwork - iwrk + 1; - zgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, - &ierr); - - if (wantvl) { - -/* - Want left eigenvectors - Copy Householder vectors to VL -*/ - - *(unsigned char *)side = 'L'; - zlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) - ; - -/* - Generate unitary matrix in VL - (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) - (RWorkspace: none) -*/ - - i__1 = *lwork - iwrk + 1; - zunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VL - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[ - vl_offset], ldvl, &work[iwrk], &i__1, info); - - if (wantvr) { - -/* - Want left and right eigenvectors - Copy Schur vectors to VR -*/ - - *(unsigned char *)side = 'B'; - zlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); - } - - } else if (wantvr) { - -/* - Want right eigenvectors - Copy Householder vectors to VR -*/ - - *(unsigned char *)side = 'R'; - zlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) - ; - -/* - Generate unitary matrix in VR - (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) - (RWorkspace: none) -*/ - - i__1 = *lwork - iwrk + 1; - zunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], - &i__1, &ierr); - -/* - Perform QR iteration, accumulating Schur vectors in VR - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ - vr_offset], ldvr, &work[iwrk], &i__1, info); - - } else { - -/* - Compute eigenvalues only - (CWorkspace: need 1, prefer HSWORK (see comments) ) - (RWorkspace: none) -*/ - - iwrk = itau; - i__1 = *lwork - iwrk + 1; - zhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[ - vr_offset], ldvr, &work[iwrk], &i__1, info); - } - -/* If INFO > 0 from ZHSEQR, then quit */ - - if (*info > 0) { - goto L50; - } - - if (wantvl || wantvr) { - -/* - Compute left and/or right eigenvectors - (CWorkspace: need 2*N) - (RWorkspace: need 2*N) -*/ - - irwork = ibal + *n; - ztrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, - &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork], - &ierr); - } - - if (wantvl) { - -/* - Undo balancing of left eigenvectors - (CWorkspace: none) - (RWorkspace: need N) -*/ - - zgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset], - ldvl, &ierr); - -/* Normalize left eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scl = 1. / dznrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); - zdscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * vl_dim1; -/* Computing 2nd power */ - d__1 = vl[i__3].r; -/* Computing 2nd power */ - d__2 = d_imag(&vl[k + i__ * vl_dim1]); - rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; -/* L10: */ - } - k = idamax_(n, &rwork[irwork], &c__1); - d_cnjg(&z__2, &vl[k + i__ * vl_dim1]); - d__1 = sqrt(rwork[irwork + k - 1]); - z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; - tmp.r = z__1.r, tmp.i = z__1.i; - zscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1); - i__2 = k + i__ * vl_dim1; - i__3 = k + i__ * vl_dim1; - d__1 = vl[i__3].r; - z__1.r = d__1, z__1.i = 0.; - vl[i__2].r = z__1.r, vl[i__2].i = z__1.i; -/* L20: */ - } - } - - if (wantvr) { - -/* - Undo balancing of right eigenvectors - (CWorkspace: none) - (RWorkspace: need N) -*/ - - zgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset], - ldvr, &ierr); - -/* Normalize right eigenvectors and make largest component real */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - scl = 1. / dznrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); - zdscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); - i__2 = *n; - for (k = 1; k <= i__2; ++k) { - i__3 = k + i__ * vr_dim1; -/* Computing 2nd power */ - d__1 = vr[i__3].r; -/* Computing 2nd power */ - d__2 = d_imag(&vr[k + i__ * vr_dim1]); - rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; -/* L30: */ - } - k = idamax_(n, &rwork[irwork], &c__1); - d_cnjg(&z__2, &vr[k + i__ * vr_dim1]); - d__1 = sqrt(rwork[irwork + k - 1]); - z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1; - tmp.r = z__1.r, tmp.i = z__1.i; - zscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1); - i__2 = k + i__ * vr_dim1; - i__3 = k + i__ * vr_dim1; - d__1 = vr[i__3].r; - z__1.r = d__1, z__1.i = 0.; - vr[i__2].r = z__1.r, vr[i__2].i = z__1.i; -/* L40: */ - } - } - -/* Undo scaling if necessary */ - -L50: - if (scalea) { - i__1 = *n - *info; -/* Computing MAX */ - i__3 = *n - *info; - i__2 = max(i__3,1); - zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1] - , &i__2, &ierr); - if (*info > 0) { - i__1 = ilo - 1; - zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n, - &ierr); - } - } - - work[1].r = (doublereal) maxwrk, work[1].i = 0.; - return 0; - -/* End of ZGEEV */ - -} /* zgeev_ */ - -/* Subroutine */ int zgehd2_(integer *n, integer *ilo, integer *ihi, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__; - static doublecomplex alpha; - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H - by a unitary similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to ZGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= max(1,N). - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the n by n general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the unitary matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX*16 array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX*16 array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEHD2", &i__1); - return 0; - } - - i__1 = *ihi - 1; - for (i__ = *ilo; i__ <= i__1; ++i__) { - -/* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *ihi - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[ - i__]); - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ - - i__2 = *ihi - i__; - zlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]); - -/* Apply H(i)' to A(i+1:ihi,i+1:n) from the left */ - - i__2 = *ihi - i__; - i__3 = *n - i__; - d_cnjg(&z__1, &tau[i__]); - zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, - &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]); - - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; -/* L10: */ - } - - return 0; - -/* End of ZGEHD2 */ - -} /* zgehd2_ */ - -/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublecomplex z__1; - - /* Local variables */ - static integer i__; - static doublecomplex t[4160] /* was [65][64] */; - static integer ib; - static doublecomplex ei; - static integer nb, nh, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zgehd2_(integer *, integer *, integer - *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), - zlahrd_(integer *, integer *, integer *, doublecomplex *, integer - *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGEHRD reduces a complex general matrix A to upper Hessenberg form H - by a unitary similarity transformation: Q' * A * Q = H . - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that A is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to ZGEBAL; otherwise they should be - set to 1 and N respectively. See Further Details. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the N-by-N general matrix to be reduced. - On exit, the upper triangle and the first subdiagonal of A - are overwritten with the upper Hessenberg matrix H, and the - elements below the first subdiagonal, with the array TAU, - represent the unitary matrix Q as a product of elementary - reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX*16 array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to - zero. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - The matrix Q is represented as a product of (ihi-ilo) elementary - reflectors - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on - exit in A(i+2:ihi,i), and tau in TAU(i). - - The contents of A are illustrated by the following example, with - n = 7, ilo = 2 and ihi = 6: - - on entry, on exit, - - ( a a a a a a a ) ( a a h h h h a ) - ( a a a a a a ) ( a h h h h a ) - ( a a a a a a ) ( h h h h h h ) - ( a a a a a a ) ( v2 h h h h h ) - ( a a a a a a ) ( v2 v3 h h h h ) - ( a a a a a a ) ( v2 v3 v4 h h h ) - ( a ) ( a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; -/* Computing MIN */ - i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nb = min(i__1,i__2); - lwkopt = *n * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEHRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - tau[i__2].r = 0., tau[i__2].i = 0.; -/* L10: */ - } - i__1 = *n - 1; - for (i__ = max(1,*ihi); i__ <= i__1; ++i__) { - i__2 = i__; - tau[i__2].r = 0., tau[i__2].i = 0.; -/* L20: */ - } - -/* Quick return if possible */ - - nh = *ihi - *ilo + 1; - if (nh <= 1) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - iws = 1; - if (nb > 1 && nb < nh) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEHRD", " ", n, ilo, ihi, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < nh) { - -/* Determine if workspace is large enough for blocked code. */ - - iws = *n * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code. - - Computing MAX -*/ - i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEHRD", " ", n, ilo, ihi, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - if (*lwork >= *n * nbmin) { - nb = *lwork / *n; - } else { - nb = 1; - } - } - } - } - ldwork = *n; - - if (nb < nbmin || nb >= nh) { - -/* Use unblocked code below */ - - i__ = *ilo; - - } else { - -/* Use blocked code */ - - i__1 = *ihi - 1 - nx; - i__2 = nb; - for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *ihi - i__; - ib = min(i__3,i__4); - -/* - Reduce columns i:i+ib-1 to Hessenberg form, returning the - matrices V and T of the block reflector H = I - V*T*V' - which performs the reduction, and also the matrix Y = A*V*T -*/ - - zlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, & - c__65, &work[1], &ldwork); - -/* - Apply the block reflector H to A(1:ihi,i+ib:ihi) from the - right, computing A := A - Y * V'. V(i+ib,ib-1) must be set - to 1. -*/ - - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - ei.r = a[i__3].r, ei.i = a[i__3].i; - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - a[i__3].r = 1., a[i__3].i = 0.; - i__3 = *ihi - i__ - ib + 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, & - z__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, - &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda); - i__3 = i__ + ib + (i__ + ib - 1) * a_dim1; - a[i__3].r = ei.r, a[i__3].i = ei.i; - -/* - Apply the block reflector H to A(i+1:ihi,i+ib:n) from the - left -*/ - - i__3 = *ihi - i__; - i__4 = *n - i__ - ib + 1; - zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", & - i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, & - c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], & - ldwork); -/* L30: */ - } - } - -/* Use unblocked code to reduce the rest of the matrix */ - - zgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); - work[1].r = (doublereal) iws, work[1].i = 0.; - - return 0; - -/* End of ZGEHRD */ - -} /* zgehrd_ */ - -/* Subroutine */ int zgelq2_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, k; - static doublecomplex alpha; - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGELQ2 computes an LQ factorization of a complex m by n matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and below the diagonal of the array - contain the m by min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX*16 array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - A(i,i+1:n), and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGELQ2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ - - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__] - ); - if (i__ < *m) { - -/* Apply H(i) to A(i+1:m,i:n) from the right */ - - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - i__2 = *m - i__; - i__3 = *n - i__ + 1; - zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ - i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__2 = i__ + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); -/* L10: */ - } - return 0; - -/* End of ZGELQ2 */ - -} /* zgelq2_ */ - -/* Subroutine */ int zgelqf_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int zgelq2_(integer *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( - char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGELQF computes an LQ factorization of a complex M-by-N matrix A: - A = L * Q. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and below the diagonal of the array - contain the m-by-min(m,n) lower trapezoidal matrix L (L is - lower triangular if m <= n); the elements above the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(k)' . . . H(2)' H(1)', where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in - A(i,i+1:n), and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *m * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGELQF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "ZGELQF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELQF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the LQ factorization of the current block - A(i:i+ib-1,i:n) -*/ - - i__3 = *n - i__ + 1; - zgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *n - i__ + 1; - zlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i+ib:m,i:n) from the right */ - - i__3 = *m - i__ - ib + 1; - i__4 = *n - i__ + 1; - zlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, - &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & - ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + - 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - zgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1].r = (doublereal) iws, work[1].i = 0.; - return 0; - -/* End of ZGELQF */ - -} /* zgelqf_ */ - -/* Subroutine */ int zgelsd_(integer *m, integer *n, integer *nrhs, - doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, - doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work, - integer *lwork, doublereal *rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - doublereal d__1; - doublecomplex z__1; - - /* Local variables */ - static integer ie, il, mm; - static doublereal eps, anrm, bnrm; - static integer itau, iascl, ibscl; - static doublereal sfmin; - static integer minmn, maxmn, itaup, itauq, mnthr, nwork; - extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); - - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dlaset_(char *, integer *, integer - *, doublereal *, doublereal *, doublereal *, integer *), - xerbla_(char *, integer *), zgebrd_(integer *, integer *, - doublecomplex *, integer *, doublereal *, doublereal *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *, - integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, - integer *, doublereal *); - static doublereal bignum; - extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, integer * - ), zlalsd_(char *, integer *, integer *, integer *, doublereal *, - doublereal *, doublecomplex *, integer *, doublereal *, integer *, - doublecomplex *, doublereal *, integer *, integer *), - zlascl_(char *, integer *, integer *, doublereal *, doublereal *, - integer *, integer *, doublecomplex *, integer *, integer *), zgeqrf_(integer *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *, integer *); - static integer ldwork; - extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *), - zlaset_(char *, integer *, integer *, doublecomplex *, - doublecomplex *, doublecomplex *, integer *); - static integer minwrk, maxwrk; - static doublereal smlnum; - extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, - integer *, integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer * - ); - static logical lquery; - static integer nrwork, smlsiz; - extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - ZGELSD computes the minimum-norm solution to a real linear least - squares problem: - minimize 2-norm(| b - A*x |) - using the singular value decomposition (SVD) of A. A is an M-by-N - matrix which may be rank-deficient. - - Several right hand side vectors b and solution vectors x can be - handled in a single call; they are stored as the columns of the - M-by-NRHS right hand side matrix B and the N-by-NRHS solution - matrix X. - - The problem is solved in three steps: - (1) Reduce the coefficient matrix A to bidiagonal form with - Householder tranformations, reducing the original problem - into a "bidiagonal least squares problem" (BLS) - (2) Solve the BLS using a divide and conquer approach. - (3) Apply back all the Householder tranformations to solve - the original least squares problem. - - The effective rank of A is determined by treating as zero those - singular values which are less than RCOND times the largest singular - value. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrices B and X. NRHS >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, A has been destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) - On entry, the M-by-NRHS right hand side matrix B. - On exit, B is overwritten by the N-by-NRHS solution matrix X. - If m >= n and RANK = n, the residual sum-of-squares for - the solution in the i-th column is given by the sum of - squares of elements n+1:m in that column. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M,N). - - S (output) DOUBLE PRECISION array, dimension (min(M,N)) - The singular values of A in decreasing order. - The condition number of A in the 2-norm = S(1)/S(min(m,n)). - - RCOND (input) DOUBLE PRECISION - RCOND is used to determine the effective rank of A. - Singular values S(i) <= RCOND*S(1) are treated as zero. - If RCOND < 0, machine precision is used instead. - - RANK (output) INTEGER - The effective rank of A, i.e., the number of singular values - which are greater than RCOND*S(1). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK must be at least 1. - The exact minimum amount of workspace needed depends on M, - N and NRHS. As long as LWORK is at least - 2 * N + N * NRHS - if M is greater than or equal to N or - 2 * M + M * NRHS - if M is less than N, the code will execute correctly. - For good performance, LWORK should generally be larger. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace) DOUBLE PRECISION array, dimension at least - 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + - (SMLSIZ+1)**2 - if M is greater than or equal to N or - 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + - (SMLSIZ+1)**2 - if M is less than N, the code will execute correctly. - SMLSIZ is returned by ILAENV and is equal to the maximum - size of the subproblems at the bottom of the computation - tree (usually about 25), and - NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) - - IWORK (workspace) INTEGER array, dimension (LIWORK) - LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, - where MINMN = MIN( M,N ). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: the algorithm for computing the SVD failed to converge; - if INFO = i, i off-diagonal elements of an intermediate - bidiagonal form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input arguments. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - --s; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - maxmn = max(*m,*n); - mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( - ftnlen)1); - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldb < max(1,maxmn)) { - *info = -7; - } - - smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Compute workspace. - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - NB refers to the optimal block size for the immediately - following subroutine, as returned by ILAENV.) -*/ - - minwrk = 1; - if (*info == 0) { - maxwrk = 0; - mm = *m; - if (*m >= *n && *m >= mnthr) { - -/* Path 1a - overdetermined, with many more rows than columns. */ - - mm = *n; -/* Computing MAX */ - i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC", m, - nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); - maxwrk = max(i__1,i__2); - } - if (*m >= *n) { - -/* - Path 1 - overdetermined or exactly determined. - - Computing MAX -*/ - i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1, - "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1) - ; - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, "ZUNMBR", - "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "ZUN" - "MBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs; - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs; - minwrk = max(i__1,i__2); - } - if (*n > *m) { - if (*n >= mnthr) { - -/* - Path 2a - underdetermined, with many more columns - than rows. -*/ - - maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, - &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * - ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& - c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * - ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, ( - ftnlen)6, (ftnlen)2); - maxwrk = max(i__1,i__2); - if (*nrhs > 1) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; - maxwrk = max(i__1,i__2); - } else { -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 1); - maxwrk = max(i__1,i__2); - } -/* Computing MAX */ - i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs; - maxwrk = max(i__1,i__2); - } else { - -/* Path 2 - underdetermined. */ - - maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1, - "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, "ZUNMBR" - , "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs; - maxwrk = max(i__1,i__2); - } -/* Computing MAX */ - i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs; - minwrk = max(i__1,i__2); - } - minwrk = min(minwrk,maxwrk); - d__1 = (doublereal) maxwrk; - z__1.r = d__1, z__1.i = 0.; - work[1].r = z__1.r, work[1].i = z__1.i; - if (*lwork < minwrk && ! lquery) { - *info = -12; - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGELSD", &i__1); - return 0; - } else if (lquery) { - goto L10; - } - -/* Quick return if possible. */ - - if (*m == 0 || *n == 0) { - *rank = 0; - return 0; - } - -/* Get machine parameters. */ - - eps = PRECISION; - sfmin = SAFEMINIMUM; - smlnum = sfmin / eps; - bignum = 1. / smlnum; - dlabad_(&smlnum, &bignum); - -/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ - - anrm = zlange_("M", m, n, &a[a_offset], lda, &rwork[1]); - iascl = 0; - if (anrm > 0. && anrm < smlnum) { - -/* Scale matrix norm up to SMLNUM */ - - zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, - info); - iascl = 1; - } else if (anrm > bignum) { - -/* Scale matrix norm down to BIGNUM. */ - - zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, - info); - iascl = 2; - } else if (anrm == 0.) { - -/* Matrix all zero. Return zero solution. */ - - i__1 = max(*m,*n); - zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); - dlaset_("F", &minmn, &c__1, &c_b324, &c_b324, &s[1], &c__1) - ; - *rank = 0; - goto L10; - } - -/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ - - bnrm = zlange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]); - ibscl = 0; - if (bnrm > 0. && bnrm < smlnum) { - -/* Scale matrix norm up to SMLNUM. */ - - zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, - info); - ibscl = 1; - } else if (bnrm > bignum) { - -/* Scale matrix norm down to BIGNUM. */ - - zlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, - info); - ibscl = 2; - } - -/* If M < N make sure B(M+1:N,:) = 0 */ - - if (*m < *n) { - i__1 = *n - *m; - zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], ldb); - } - -/* Overdetermined case. */ - - if (*m >= *n) { - -/* Path 1 - overdetermined or exactly determined. */ - - mm = *m; - if (*m >= mnthr) { - -/* Path 1a - overdetermined, with many more rows than columns */ - - mm = *n; - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R. - (RWorkspace: need N) - (CWorkspace: need N, prefer N*NB) -*/ - - i__1 = *lwork - nwork + 1; - zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, - info); - -/* - Multiply B by transpose(Q). - (RWorkspace: need N) - (CWorkspace: need NRHS, prefer NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - zunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ - b_offset], ldb, &work[nwork], &i__1, info); - -/* Zero out below R. */ - - if (*n > 1) { - i__1 = *n - 1; - i__2 = *n - 1; - zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2], - lda); - } - } - - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - ie = 1; - nrwork = ie + *n; - -/* - Bidiagonalize R in A. - (RWorkspace: need N) - (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], & - work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors of R. - (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], - &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - zlalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, - rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of R. */ - - i__1 = *lwork - nwork + 1; - zunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & - b[b_offset], ldb, &work[nwork], &i__1, info); - - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( - i__1,*nrhs), i__2 = *n - *m * 3; - if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { - -/* - Path 2a - underdetermined, with many more columns than rows - and sufficient workspace for an efficient algorithm. -*/ - - ldwork = *m; -/* - Computing MAX - Computing MAX -*/ - i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = - max(i__3,*nrhs), i__4 = *n - *m * 3; - i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + - *m + *m * *nrhs; - if (*lwork >= max(i__1,i__2)) { - ldwork = *lda; - } - itau = 1; - nwork = *m + 1; - -/* - Compute A=L*Q. - (CWorkspace: need 2*M, prefer M+M*NB) -*/ - - i__1 = *lwork - nwork + 1; - zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, - info); - il = nwork; - -/* Copy L to WORK(IL), zeroing out above its diagonal. */ - - zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); - i__1 = *m - 1; - i__2 = *m - 1; - zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwork], & - ldwork); - itauq = il + ldwork * *m; - itaup = itauq + *m; - nwork = itaup + *m; - ie = 1; - nrwork = ie + *m; - -/* - Bidiagonalize L in WORK(IL). - (RWorkspace: need M) - (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors of L. - (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[ - itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - zlalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], - ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], - info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of L. */ - - i__1 = *lwork - nwork + 1; - zunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ - itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Zero out below first M rows of B. */ - - i__1 = *n - *m; - zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], - ldb); - nwork = itau + *m; - -/* - Multiply transpose(Q) by B. - (CWorkspace: need NRHS, prefer NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - zunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ - b_offset], ldb, &work[nwork], &i__1, info); - - } else { - -/* Path 2 - remaining underdetermined cases. */ - - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - ie = 1; - nrwork = ie + *m; - -/* - Bidiagonalize A. - (RWorkspace: need M) - (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, info); - -/* - Multiply B by transpose of left bidiagonalizing vectors. - (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) -*/ - - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq] - , &b[b_offset], ldb, &work[nwork], &i__1, info); - -/* Solve the bidiagonal least squares problem. */ - - zlalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], - ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], - info); - if (*info != 0) { - goto L10; - } - -/* Multiply B by right bidiagonalizing vectors of A. */ - - i__1 = *lwork - nwork + 1; - zunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] - , &b[b_offset], ldb, &work[nwork], &i__1, info); - - } - } - -/* Undo scaling. */ - - if (iascl == 1) { - zlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, - info); - dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, info); - } else if (iascl == 2) { - zlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, - info); - dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, info); - } - if (ibscl == 1) { - zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, - info); - } else if (ibscl == 2) { - zlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, - info); - } - -L10: - d__1 = (doublereal) maxwrk; - z__1.r = d__1, z__1.i = 0.; - work[1].r = z__1.r, work[1].i = z__1.i; - return 0; - -/* End of ZGELSD */ - -} /* zgelsd_ */ - -/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublecomplex *tau, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, k; - static doublecomplex alpha; - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGEQR2 computes a QR factorization of a complex m by n matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(m,n) by n upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors (see Further Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace) COMPLEX*16 array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEQR2", &i__1); - return 0; - } - - k = min(*m,*n); - - i__1 = k; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ - - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] - , &c__1, &tau[i__]); - if (i__ < *n) { - -/* Apply H(i)' to A(i:m,i+1:n) from the left */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - i__2 = *m - i__ + 1; - i__3 = *n - i__; - d_cnjg(&z__1, &tau[i__]); - zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1, - &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - i__2 = i__ + i__ * a_dim1; - a[i__2].r = alpha.r, a[i__2].i = alpha.i; - } -/* L10: */ - } - return 0; - -/* End of ZGEQR2 */ - -} /* zgeqr2_ */ - -/* Subroutine */ int zgeqrf_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, k, ib, nb, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int zgeqr2_(integer *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), xerbla_( - char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZGEQRF computes a QR factorization of a complex M-by-N matrix A: - A = Q * R. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, the elements on and above the diagonal of the array - contain the min(M,N)-by-N upper trapezoidal matrix R (R is - upper triangular if m >= n); the elements below the diagonal, - with the array TAU, represent the unitary matrix Q as a - product of min(m,n) elementary reflectors (see Further - Details). - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - TAU (output) COMPLEX*16 array, dimension (min(M,N)) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The matrix Q is represented as a product of elementary reflectors - - Q = H(1) H(2) . . . H(k), where k = min(m,n). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), - and tau in TAU(i). - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - lwkopt = *n * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGEQRF", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - k = min(*m,*n); - if (k == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "ZGEQRF", " ", m, n, &c_n1, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEQRF", " ", m, n, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < k && nx < k) { - -/* Use blocked code initially */ - - i__1 = k - nx; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = k - i__ + 1; - ib = min(i__3,nb); - -/* - Compute the QR factorization of the current block - A(i:m,i:i+ib-1) -*/ - - i__3 = *m - i__ + 1; - zgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ - 1], &iinfo); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__3 = *m - i__ + 1; - zlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i:m,i+ib:n) from the left */ - - i__3 = *m - i__ + 1; - i__4 = *n - i__ - ib + 1; - zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise" - , &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, & - work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, - &work[ib + 1], &ldwork); - } -/* L10: */ - } - } else { - i__ = 1; - } - -/* Use unblocked code to factor the last or only block. */ - - if (i__ <= k) { - i__2 = *m - i__ + 1; - i__1 = *n - i__ + 1; - zgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] - , &iinfo); - } - - work[1].r = (doublereal) iws, work[1].i = 0.; - return 0; - -/* End of ZGEQRF */ - -} /* zgeqrf_ */ - -/* Subroutine */ int zgesdd_(char *jobz, integer *m, integer *n, - doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u, - integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work, - integer *lwork, doublereal *rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, - i__2, i__3; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer i__, ie, il, ir, iu, blk; - static doublereal dum[1], eps; - static integer iru, ivt, iscl; - static doublereal anrm; - static integer idum[1], ierr, itau, irvt; - extern logical lsame_(char *, char *); - static integer chunk, minmn; - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer wrkbl, itaup, itauq; - static logical wntqa; - static integer nwork; - static logical wntqn, wntqo, wntqs; - extern /* Subroutine */ int zlacp2_(char *, integer *, integer *, - doublereal *, integer *, doublecomplex *, integer *); - static integer mnthr1, mnthr2; - extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal - *, doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *, doublereal *, integer *, integer *); - - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), xerbla_(char *, integer *), - zgebrd_(integer *, integer *, doublecomplex *, integer *, - doublereal *, doublereal *, doublecomplex *, doublecomplex *, - doublecomplex *, integer *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static doublereal bignum; - extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, - integer *, doublereal *); - extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, integer * - ), zlacrm_(integer *, integer *, doublecomplex *, integer *, - doublereal *, integer *, doublecomplex *, integer *, doublereal *) - , zlarcm_(integer *, integer *, doublereal *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublereal *), zlascl_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, integer *, doublecomplex *, integer *, - integer *), zgeqrf_(integer *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *, integer * - ); - static integer ldwrkl; - extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *), - zlaset_(char *, integer *, integer *, doublecomplex *, - doublecomplex *, doublecomplex *, integer *); - static integer ldwrkr, minwrk, ldwrku, maxwrk; - extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer - *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - static integer ldwkvt; - static doublereal smlnum; - static logical wntqas; - extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *, - integer *, integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer * - ), zunglq_(integer *, integer *, integer * - , doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - static logical lquery; - static integer nrwork; - extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - ZGESDD computes the singular value decomposition (SVD) of a complex - M-by-N matrix A, optionally computing the left and/or right singular - vectors, by using divide-and-conquer method. The SVD is written - - A = U * SIGMA * conjugate-transpose(V) - - where SIGMA is an M-by-N matrix which is zero except for its - min(m,n) diagonal elements, U is an M-by-M unitary matrix, and - V is an N-by-N unitary matrix. The diagonal elements of SIGMA - are the singular values of A; they are real and non-negative, and - are returned in descending order. The first min(m,n) columns of - U and V are the left and right singular vectors of A. - - Note that the routine returns VT = V**H, not V. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - Specifies options for computing all or part of the matrix U: - = 'A': all M columns of U and all N rows of V**H are - returned in the arrays U and VT; - = 'S': the first min(M,N) columns of U and the first - min(M,N) rows of V**H are returned in the arrays U - and VT; - = 'O': If M >= N, the first N columns of U are overwritten - on the array A and all rows of V**H are returned in - the array VT; - otherwise, all columns of U are returned in the - array U and the first M rows of V**H are overwritten - in the array VT; - = 'N': no columns of U or rows of V**H are computed. - - M (input) INTEGER - The number of rows of the input matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the input matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N matrix A. - On exit, - if JOBZ = 'O', A is overwritten with the first N columns - of U (the left singular vectors, stored - columnwise) if M >= N; - A is overwritten with the first M rows - of V**H (the right singular vectors, stored - rowwise) otherwise. - if JOBZ .ne. 'O', the contents of A are destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - S (output) DOUBLE PRECISION array, dimension (min(M,N)) - The singular values of A, sorted so that S(i) >= S(i+1). - - U (output) COMPLEX*16 array, dimension (LDU,UCOL) - UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; - UCOL = min(M,N) if JOBZ = 'S'. - If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M - unitary matrix U; - if JOBZ = 'S', U contains the first min(M,N) columns of U - (the left singular vectors, stored columnwise); - if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. - - LDU (input) INTEGER - The leading dimension of the array U. LDU >= 1; if - JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. - - VT (output) COMPLEX*16 array, dimension (LDVT,N) - If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the - N-by-N unitary matrix V**H; - if JOBZ = 'S', VT contains the first min(M,N) rows of - V**H (the right singular vectors, stored rowwise); - if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. - - LDVT (input) INTEGER - The leading dimension of the array VT. LDVT >= 1; if - JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; - if JOBZ = 'S', LDVT >= min(M,N). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N). - if JOBZ = 'O', - LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). - if JOBZ = 'S' or 'A', - LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N). - For good performance, LWORK should generally be larger. - If LWORK < 0 but other input arguments are legal, WORK(1) - returns the optimal LWORK. - - RWORK (workspace) DOUBLE PRECISION array, dimension (LRWORK) - If JOBZ = 'N', LRWORK >= 7*min(M,N). - Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N) - - IWORK (workspace) INTEGER array, dimension (8*min(M,N)) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The updating process of DBDSDC did not converge. - - Further Details - =============== - - Based on contributions by - Ming Gu and Huan Ren, Computer Science Division, University of - California at Berkeley, USA - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --s; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - vt_dim1 = *ldvt; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - minmn = min(*m,*n); - mnthr1 = (integer) (minmn * 17. / 9.); - mnthr2 = (integer) (minmn * 5. / 3.); - wntqa = lsame_(jobz, "A"); - wntqs = lsame_(jobz, "S"); - wntqas = wntqa || wntqs; - wntqo = lsame_(jobz, "O"); - wntqn = lsame_(jobz, "N"); - minwrk = 1; - maxwrk = 1; - lquery = *lwork == -1; - - if (! (wntqa || wntqs || wntqo || wntqn)) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < * - m) { - *info = -8; - } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || - wntqo && *m >= *n && *ldvt < *n) { - *info = -10; - } - -/* - Compute workspace - (Note: Comments in the code beginning "Workspace:" describe the - minimal amount of workspace needed at that point in the code, - as well as the preferred amount for good performance. - CWorkspace refers to complex workspace, and RWorkspace to - real workspace. NB refers to the optimal block size for the - immediately following subroutine, as returned by ILAENV.) -*/ - - if (*info == 0 && *m > 0 && *n > 0) { - if (*m >= *n) { - -/* - There is no complex work space needed for bidiagonal SVD - The real work space needed for bidiagonal SVD is BDSPAC, - BDSPAC = 3*N*N + 4*N -*/ - - if (*m >= mnthr1) { - if (wntqn) { - -/* Path 1 (M much larger than N, JOBZ='N') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); - maxwrk = wrkbl; - minwrk = *n * 3; - } else if (wntqo) { - -/* Path 2 (M much larger than N, JOBZ='O') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *n + *n * *n + wrkbl; - minwrk = (*n << 1) * *n + *n * 3; - } else if (wntqs) { - -/* Path 3 (M much larger than N, JOBZ='S') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR", - " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *n * *n + wrkbl; - minwrk = *n * *n + *n * 3; - } else if (wntqa) { - -/* Path 4 (M much larger than N, JOBZ='A') */ - - wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "ZUNGQR", - " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *n * *n + wrkbl; - minwrk = *n * *n + (*n << 1) + *m; - } - } else if (*m >= mnthr2) { - -/* Path 5 (M much larger than N, but not as much as MNTHR1) */ - - maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*n << 1) + *m; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *n * *n; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } - } else { - -/* Path 6 (M at least N, but not much larger) */ - - maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*n << 1) + *m; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *n * *n; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } - } - } else { - -/* - There is no complex work space needed for bidiagonal SVD - The real work space needed for bidiagonal SVD is BDSPAC, - BDSPAC = 3*M*M + 4*M -*/ - - if (*n >= mnthr1) { - if (wntqn) { - -/* Path 1t (N much larger than M, JOBZ='N') */ - - maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - maxwrk = max(i__1,i__2); - minwrk = *m * 3; - } else if (wntqo) { - -/* Path 2t (N much larger than M, JOBZ='O') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *n + *m * *m + wrkbl; - minwrk = (*m << 1) * *m + *m * 3; - } else if (wntqs) { - -/* Path 3t (N much larger than M, JOBZ='S') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ", - " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *m + wrkbl; - minwrk = *m * *m + *m * 3; - } else if (wntqa) { - -/* Path 4t (N much larger than M, JOBZ='A') */ - - wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, & - c_n1, &c_n1, (ftnlen)6, (ftnlen)1); -/* Computing MAX */ - i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "ZUNGLQ", - " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(& - c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen) - 6, (ftnlen)1); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); -/* Computing MAX */ - i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - wrkbl = max(i__1,i__2); - maxwrk = *m * *m + wrkbl; - minwrk = *m * *m + (*m << 1) + *n; - } - } else if (*n >= mnthr2) { - -/* Path 5t (N much larger than M, but not as much as MNTHR1) */ - - maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*m << 1) + *n; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *m * *m; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen) - 1); - maxwrk = max(i__1,i__2); - } - } else { - -/* Path 6t (N greater than M, but not much larger) */ - - maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD", - " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); - minwrk = (*m << 1) + *n; - if (wntqo) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - maxwrk += *m * *n; - minwrk += *m * *m; - } else if (wntqs) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } else if (wntqa) { -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1, - "ZUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); -/* Computing MAX */ - i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, - "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, ( - ftnlen)3); - maxwrk = max(i__1,i__2); - } - } - } - maxwrk = max(maxwrk,minwrk); - work[1].r = (doublereal) maxwrk, work[1].i = 0.; - } - - if (*lwork < minwrk && ! lquery) { - *info = -13; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGESDD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - if (*lwork >= 1) { - work[1].r = 1., work[1].i = 0.; - } - return 0; - } - -/* Get machine constants */ - - eps = PRECISION; - smlnum = sqrt(SAFEMINIMUM) / eps; - bignum = 1. / smlnum; - -/* Scale A if max element outside range [SMLNUM,BIGNUM] */ - - anrm = zlange_("M", m, n, &a[a_offset], lda, dum); - iscl = 0; - if (anrm > 0. && anrm < smlnum) { - iscl = 1; - zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & - ierr); - } else if (anrm > bignum) { - iscl = 1; - zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & - ierr); - } - - if (*m >= *n) { - -/* - A has at least as many rows as columns. If A has sufficiently - more rows than columns, first reduce using the QR - decomposition (if sufficient workspace available) -*/ - - if (*m >= mnthr1) { - - if (wntqn) { - -/* - Path 1 (M much larger than N, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: need 0) -*/ - - i__1 = *lwork - nwork + 1; - zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Zero out below R */ - - i__1 = *n - 1; - i__2 = *n - 1; - zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (CWorkspace: need 3*N, prefer 2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - nrwork = ie + *n; - -/* - Perform bidiagonal SVD, compute singular values only - (CWorkspace: 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2 (M much larger than N, JOBZ='O') - N left singular vectors to be overwritten on A and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - ir = iu + ldwrku * *n; - if (*lwork >= *m * *n + *n * *n + *n * 3) { - -/* WORK(IR) is M by N */ - - ldwrkr = *m; - } else { - ldwrkr = (*lwork - *n * *n - *n * 3) / *n; - } - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy R to WORK( IR ), zeroing out below it */ - - zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__1 = *n - 1; - i__2 = *n - 1; - zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &work[ir + 1], & - ldwrkr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of R in WORK(IRU) and computing right singular vectors - of R in WORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by the left singular vectors of R - (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & - ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by the right singular vectors of R - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IU), storing result in WORK(IR) and copying to A - (CWorkspace: need 2*N*N, prefer N*N+M*N) - (RWorkspace: 0) -*/ - - i__1 = *m; - i__2 = ldwrkr; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrkr); - zgemm_("N", "N", &chunk, n, n, &c_b60, &a[i__ + a_dim1], - lda, &work[iu], &ldwrku, &c_b59, &work[ir], & - ldwrkr); - zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + - a_dim1], lda); -/* L10: */ - } - - } else if (wntqs) { - -/* - Path 3 (M much larger than N, JOBZ='S') - N left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - ir = 1; - -/* WORK(IR) is N by N */ - - ldwrkr = *n; - itau = ir + ldwrkr * *n; - nwork = itau + *n; - -/* - Compute A=Q*R - (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy R to WORK(IR), zeroing out below it */ - - zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr); - i__2 = *n - 1; - i__1 = *n - 1; - zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &work[ir + 1], & - ldwrkr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in WORK(IR) - (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in A by left singular vectors of R in - WORK(IR), storing result in U - (CWorkspace: need N*N) - (RWorkspace: 0) -*/ - - zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr); - zgemm_("N", "N", m, n, n, &c_b60, &a[a_offset], lda, &work[ir] - , &ldwrkr, &c_b59, &u[u_offset], ldu); - - } else if (wntqa) { - -/* - Path 4 (M much larger than N, JOBZ='A') - M left singular vectors to be computed in U and - N right singular vectors to be computed in VT -*/ - - iu = 1; - -/* WORK(IU) is N by N */ - - ldwrku = *n; - itau = iu + ldwrku * *n; - nwork = itau + *n; - -/* - Compute A=Q*R, copying result to U - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Generate Q in U - (CWorkspace: need N+M, prefer N+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], - &i__2, &ierr); - -/* Produce R in A, zeroing out below it */ - - i__2 = *n - 1; - i__1 = *n - 1; - zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &a[a_dim1 + 2], - lda); - ie = 1; - itauq = itau; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize R in A - (CWorkspace: need 3*N, prefer 2*N+2*N*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - iru = ie + *n; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by left singular vectors of R - (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__2, & - ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of R - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply Q in U by left singular vectors of R in - WORK(IU), storing result in A - (CWorkspace: need N*N) - (RWorkspace: 0) -*/ - - zgemm_("N", "N", m, n, n, &c_b60, &u[u_offset], ldu, &work[iu] - , &ldwrku, &c_b59, &a[a_offset], lda); - -/* Copy left singular vectors of A from A to U */ - - zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - - } - - } else if (*m >= mnthr2) { - -/* - MNTHR2 <= M < MNTHR1 - - Path 5 (M much larger than N, but not as much as MNTHR1) - Reduce to bidiagonal form without QR decomposition, use - ZUNGBR and matrix multiplication to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *n; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) - (RWorkspace: need N) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Generate Q in A - (CWorkspace: need 2*N, prefer N+N*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ - nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *n * 3) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - } else { - -/* WORK(IU) is LDWRKU by N */ - - ldwrku = (*lwork - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in WORK(IU), copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu] - , &ldwrku, &rwork[nrwork]); - zlacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need N*N, prefer M*N) - (Rworkspace: need 3*N*N, prefer N*N+2*M*N) -*/ - - nrwork = irvt; - i__2 = *m; - i__1 = ldwrku; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrku); - zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n, - &work[iu], &ldwrku, &rwork[nrwork]); - zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + - a_dim1], lda); -/* L20: */ - } - - } else if (wntqs) { - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__1, &ierr); - -/* - Copy A to U, generate Q - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need N*N+2*M*N) -*/ - - nrwork = irvt; - zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], - lda, &rwork[nrwork]); - zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - } else { - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__1, &ierr); - -/* - Copy A to U, generate Q - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: 0) -*/ - - zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need 3*N*N) -*/ - - zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: 0) - (Rworkspace: need 3*N*N) -*/ - - nrwork = irvt; - zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset], - lda, &rwork[nrwork]); - zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu); - } - - } else { - -/* - M .LT. MNTHR2 - - Path 6 (M at least N, but not much larger) - Reduce to bidiagonal form without QR decomposition - Use ZUNMBR to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *n; - itauq = 1; - itaup = itauq + *n; - nwork = itaup + *n; - -/* - Bidiagonalize A - (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) - (RWorkspace: need N) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - iu = nwork; - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - if (*lwork >= *m * *n + *n * 3) { - -/* WORK( IU ) is M by N */ - - ldwrku = *m; - } else { - -/* WORK( IU ) is LDWRKU by N */ - - ldwrku = (*lwork - *n * 3) / *n; - } - nwork = iu + ldwrku * *n; - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: need 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - - if (*lwork >= *m * *n + *n * 3) { - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by left singular vectors of A, copying - to A - (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB) - (Rworkspace: need 0) -*/ - - zlaset_("F", m, n, &c_b59, &c_b59, &work[iu], &ldwrku); - zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &work[iu], &ldwrku, &work[nwork], &i__1, & - ierr); - zlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda); - } else { - -/* - Generate Q in A - (Cworkspace: need 2*N, prefer N+N*NB) - (Rworkspace: need 0) -*/ - - i__1 = *lwork - nwork + 1; - zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], & - work[nwork], &i__1, &ierr); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need N*N, prefer M*N) - (Rworkspace: need 3*N*N, prefer N*N+2*M*N) -*/ - - nrwork = irvt; - i__1 = *m; - i__2 = ldwrku; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *m - i__ + 1; - chunk = min(i__3,ldwrku); - zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], - n, &work[iu], &ldwrku, &rwork[nrwork]); - zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + - a_dim1], lda); -/* L30: */ - } - } - - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - zlaset_("F", m, n, &c_b59, &c_b59, &u[u_offset], ldu); - zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - } else { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = nrwork; - irvt = iru + *n * *n; - nrwork = irvt + *n * *n; - dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, & - rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* Set the right corner of U to identity matrix */ - - zlaset_("F", m, m, &c_b59, &c_b59, &u[u_offset], ldu); - i__2 = *m - *n; - i__1 = *m - *n; - zlaset_("F", &i__2, &i__1, &c_b59, &c_b60, &u[*n + 1 + (*n + - 1) * u_dim1], ldu); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 2*N+M, prefer 2*N+M*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*N, prefer 2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, & - ierr); - } - - } - - } else { - -/* - A has more columns than rows. If A has sufficiently more - columns than rows, first reduce using the LQ decomposition - (if sufficient workspace available) -*/ - - if (*n >= mnthr1) { - - if (wntqn) { - -/* - Path 1t (N much larger than M, JOBZ='N') - No singular vectors to be computed -*/ - - itau = 1; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Zero out above L */ - - i__2 = *m - 1; - i__1 = *m - 1; - zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[(a_dim1 << 1) + - 1], lda); - ie = 1; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (CWorkspace: need 3*M, prefer 2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - nrwork = ie + *m; - -/* - Perform bidiagonal SVD, compute singular values only - (CWorkspace: 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - - } else if (wntqo) { - -/* - Path 2t (N much larger than M, JOBZ='O') - M right singular vectors to be overwritten on A and - M left singular vectors to be computed in U -*/ - - ivt = 1; - ldwkvt = *m; - -/* WORK(IVT) is M by M */ - - il = ivt + ldwkvt * *m; - if (*lwork >= *m * *n + *m * *m + *m * 3) { - -/* WORK(IL) M by N */ - - ldwrkl = *m; - chunk = *n; - } else { - -/* WORK(IL) is M by CHUNK */ - - ldwrkl = *m; - chunk = (*lwork - *m * *m - *m * 3) / *m; - } - itau = il + ldwrkl * chunk; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__2, &ierr); - -/* Copy L to WORK(IL), zeroing about above it */ - - zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__2 = *m - 1; - i__1 = *m - 1; - zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &work[il + ldwrkl], - &ldwrkl); - -/* - Generate Q in A - (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) - (RWorkspace: 0) -*/ - - i__2 = *lwork - nwork + 1; - zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__2, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix WORK(IU) - Overwrite WORK(IU) by the left singular vectors of L - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by the right singular vectors of L - (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IL) by Q - in A, storing result in WORK(IL) and copying to A - (CWorkspace: need 2*M*M, prefer M*M+M*N)) - (RWorkspace: 0) -*/ - - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - zgemm_("N", "N", m, &blk, m, &c_b60, &work[ivt], m, &a[ - i__ * a_dim1 + 1], lda, &c_b59, &work[il], & - ldwrkl); - zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 - + 1], lda); -/* L40: */ - } - - } else if (wntqs) { - -/* - Path 3t (N much larger than M, JOBZ='S') - M right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - il = 1; - -/* WORK(IL) is M by M */ - - ldwrkl = *m; - itau = il + ldwrkl * *m; - nwork = itau + *m; - -/* - Compute A=L*Q - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - -/* Copy L to WORK(IL), zeroing out above it */ - - zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl); - i__1 = *m - 1; - i__2 = *m - 1; - zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwrkl], - &ldwrkl); - -/* - Generate Q in A - (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], - &i__1, &ierr); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in WORK(IL) - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by left singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - -/* - Copy VT to WORK(IL), multiply right singular vectors of L - in WORK(IL) by Q in A, storing result in VT - (CWorkspace: need M*M) - (RWorkspace: 0) -*/ - - zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl); - zgemm_("N", "N", m, n, m, &c_b60, &work[il], &ldwrkl, &a[ - a_offset], lda, &c_b59, &vt[vt_offset], ldvt); - - } else if (wntqa) { - -/* - Path 9t (N much larger than M, JOBZ='A') - N right singular vectors to be computed in VT and - M left singular vectors to be computed in U -*/ - - ivt = 1; - -/* WORK(IVT) is M by M */ - - ldwkvt = *m; - itau = ivt + ldwkvt * *m; - nwork = itau + *m; - -/* - Compute A=L*Q, copying result to VT - (CWorkspace: need 2*M, prefer M+M*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], & - i__1, &ierr); - zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - -/* - Generate Q in VT - (CWorkspace: need M+N, prefer M+N*NB) - (RWorkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[ - nwork], &i__1, &ierr); - -/* Produce L in A, zeroing out above it */ - - i__1 = *m - 1; - i__2 = *m - 1; - zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[(a_dim1 << 1) + - 1], lda); - ie = 1; - itauq = itau; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize L in A - (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) - (RWorkspace: need M) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[ - itauq], &work[itaup], &work[nwork], &i__1, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - iru = ie + *m; - irvt = iru + *m * *m; - nrwork = irvt + *m * *m; - dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of L - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by right singular vectors of L - (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) - (RWorkspace: 0) -*/ - - zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, & - ierr); - -/* - Multiply right singular vectors of L in WORK(IVT) by - Q in VT, storing result in A - (CWorkspace: need M*M) - (RWorkspace: 0) -*/ - - zgemm_("N", "N", m, n, m, &c_b60, &work[ivt], &ldwkvt, &vt[ - vt_offset], ldvt, &c_b59, &a[a_offset], lda); - -/* Copy right singular vectors of A from A to VT */ - - zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - - } - - } else if (*n >= mnthr2) { - -/* - MNTHR2 <= N < MNTHR1 - - Path 5t (N much larger than M, but not as much as MNTHR1) - Reduce to bidiagonal form without QR decomposition, use - ZUNGBR and matrix multiplication to compute singular vectors -*/ - - - ie = 1; - nrwork = ie + *m; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) - (RWorkspace: M) -*/ - - i__1 = *lwork - nwork + 1; - zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__1, &ierr); - - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - ivt = nwork; - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__1, &ierr); - -/* - Generate P**H in A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - i__1 = *lwork - nwork + 1; - zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ - nwork], &i__1, &ierr); - - ldwkvt = *m; - if (*lwork >= *m * *n + *m * 3) { - -/* WORK( IVT ) is M by N */ - - nwork = ivt + ldwkvt * *n; - chunk = *n; - } else { - -/* WORK( IVT ) is M by CHUNK */ - - chunk = (*lwork - *m * 3) / *m; - nwork = ivt + ldwkvt * chunk; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRVT) - storing the result in WORK(IVT), copying to U - (Cworkspace: need 0) - (Rworkspace: need 2*M*M) -*/ - - zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], & - ldwkvt, &rwork[nrwork]); - zlacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu); - -/* - Multiply RWORK(IRVT) by P**H in A, storing the - result in WORK(IVT), copying to A - (CWorkspace: need M*M, prefer M*N) - (Rworkspace: need 2*M*M, prefer 2*M*N) -*/ - - nrwork = iru; - i__1 = *n; - i__2 = chunk; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1], - lda, &work[ivt], &ldwkvt, &rwork[nrwork]); - zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * - a_dim1 + 1], lda); -/* L50: */ - } - } else if (wntqs) { - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__2, &ierr); - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need 3*M*M) -*/ - - zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], - lda, &rwork[nrwork]); - zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need M*M+2*M*N) -*/ - - nrwork = iru; - zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - } else { - -/* - Copy A to U, generate Q - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ - nwork], &i__2, &ierr); - -/* - Copy A to VT, generate P**H - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: 0) -*/ - - zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - i__2 = *lwork - nwork + 1; - zungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Multiply Q in U by real matrix RWORK(IRU), storing the - result in A, copying to U - (CWorkspace: need 0) - (Rworkspace: need 3*M*M) -*/ - - zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset], - lda, &rwork[nrwork]); - zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu); - -/* - Multiply real matrix RWORK(IRVT) by P**H in VT, - storing the result in A, copying to VT - (Cworkspace: need 0) - (Rworkspace: need M*M+2*M*N) -*/ - - zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[ - a_offset], lda, &rwork[nrwork]); - zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt); - } - - } else { - -/* - N .LT. MNTHR2 - - Path 6t (N greater than M, but not much larger) - Reduce to bidiagonal form without LQ decomposition - Use ZUNMBR to compute singular vectors -*/ - - ie = 1; - nrwork = ie + *m; - itauq = 1; - itaup = itauq + *m; - nwork = itaup + *m; - -/* - Bidiagonalize A - (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) - (RWorkspace: M) -*/ - - i__2 = *lwork - nwork + 1; - zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], - &work[itaup], &work[nwork], &i__2, &ierr); - if (wntqn) { - -/* - Compute singular values only - (Cworkspace: 0) - (Rworkspace: need BDSPAC) -*/ - - dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, & - c__1, dum, idum, &rwork[nrwork], &iwork[1], info); - } else if (wntqo) { - ldwkvt = *m; - ivt = nwork; - if (*lwork >= *m * *n + *m * 3) { - -/* WORK( IVT ) is M by N */ - - zlaset_("F", m, n, &c_b59, &c_b59, &work[ivt], &ldwkvt); - nwork = ivt + ldwkvt * *n; - } else { - -/* WORK( IVT ) is M by CHUNK */ - - chunk = (*lwork - *m * 3) / *m; - nwork = ivt + ldwkvt * chunk; - } - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: need 0) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__2 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr); - - if (*lwork >= *m * *n + *m * 3) { - -/* - Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) - Overwrite WORK(IVT) by right singular vectors of A, - copying to A - (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB) - (Rworkspace: need 0) -*/ - - zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt); - i__2 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ - itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, - &ierr); - zlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda); - } else { - -/* - Generate P**H in A - (Cworkspace: need 2*M, prefer M+M*NB) - (Rworkspace: need 0) -*/ - - i__2 = *lwork - nwork + 1; - zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], & - work[nwork], &i__2, &ierr); - -/* - Multiply Q in A by real matrix RWORK(IRU), storing the - result in WORK(IU), copying to A - (CWorkspace: need M*M, prefer M*N) - (Rworkspace: need 3*M*M, prefer M*M+2*M*N) -*/ - - nrwork = iru; - i__2 = *n; - i__1 = chunk; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += - i__1) { -/* Computing MIN */ - i__3 = *n - i__ + 1; - blk = min(i__3,chunk); - zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1] - , lda, &work[ivt], &ldwkvt, &rwork[nrwork]); - zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ * - a_dim1 + 1], lda); -/* L60: */ - } - } - } else if (wntqs) { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - zlaset_("F", m, n, &c_b59, &c_b59, &vt[vt_offset], ldvt); - zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } else { - -/* - Perform bidiagonal SVD, computing left singular vectors - of bidiagonal matrix in RWORK(IRU) and computing right - singular vectors of bidiagonal matrix in RWORK(IRVT) - (CWorkspace: need 0) - (RWorkspace: need BDSPAC) -*/ - - irvt = nrwork; - iru = irvt + *m * *m; - nrwork = iru + *m * *m; - - dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, & - rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1], - info); - -/* - Copy real matrix RWORK(IRU) to complex matrix U - Overwrite U by left singular vectors of A - (CWorkspace: need 3*M, prefer 2*M+M*NB) - (RWorkspace: M*M) -*/ - - zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu); - i__1 = *lwork - nwork + 1; - zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[ - itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr); - -/* Set the right corner of VT to identity matrix */ - - i__1 = *n - *m; - i__2 = *n - *m; - zlaset_("F", &i__1, &i__2, &c_b59, &c_b60, &vt[*m + 1 + (*m + - 1) * vt_dim1], ldvt); - -/* - Copy real matrix RWORK(IRVT) to complex matrix VT - Overwrite VT by right singular vectors of A - (CWorkspace: need 2*M+N, prefer 2*M+N*NB) - (RWorkspace: M*M) -*/ - - zlaset_("F", n, n, &c_b59, &c_b59, &vt[vt_offset], ldvt); - zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt); - i__1 = *lwork - nwork + 1; - zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[ - itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, & - ierr); - } - - } - - } - -/* Undo scaling if necessary */ - - if (iscl == 1) { - if (anrm > bignum) { - dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - if (anrm < smlnum) { - dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & - minmn, &ierr); - } - } - -/* Return optimal workspace in WORK(1) */ - - work[1].r = (doublereal) maxwrk, work[1].i = 0.; - - return 0; - -/* End of ZGESDD */ - -} /* zgesdd_ */ - -/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a, - integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer * - info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern /* Subroutine */ int xerbla_(char *, integer *), zgetrf_( - integer *, integer *, doublecomplex *, integer *, integer *, - integer *), zgetrs_(char *, integer *, integer *, doublecomplex *, - integer *, integer *, doublecomplex *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - ZGESV computes the solution to a complex system of linear equations - A * X = B, - where A is an N-by-N matrix and X and B are N-by-NRHS matrices. - - The LU decomposition with partial pivoting and row interchanges is - used to factor A as - A = P * L * U, - where P is a permutation matrix, L is unit lower triangular, and U is - upper triangular. The factored form of A is then used to solve the - system of equations A * X = B. - - Arguments - ========= - - N (input) INTEGER - The number of linear equations, i.e., the order of the - matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the N-by-N coefficient matrix A. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (output) INTEGER array, dimension (N) - The pivot indices that define the permutation matrix P; - row i of the matrix was interchanged with row IPIV(i). - - B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) - On entry, the N-by-NRHS matrix of right hand side matrix B. - On exit, if INFO = 0, the N-by-NRHS solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, so the solution could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - if (*n < 0) { - *info = -1; - } else if (*nrhs < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGESV ", &i__1); - return 0; - } - -/* Compute the LU factorization of A. */ - - zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); - if (*info == 0) { - -/* Solve the system A*X = B, overwriting B with X. */ - - zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ - b_offset], ldb, info); - } - return 0; - -/* End of ZGESV */ - -} /* zgesv_ */ - -/* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a, - integer *lda, integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void z_div(doublecomplex *, doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer j, jp; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), zgeru_(integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *), zswap_(integer *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( - char *, integer *); - extern integer izamax_(integer *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGETF2 computes an LU factorization of a general m-by-n matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 2 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, U(k,k) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGETF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - -/* Find pivot and test for singularity. */ - - i__2 = *m - j + 1; - jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1); - ipiv[j] = jp; - i__2 = jp + j * a_dim1; - if (a[i__2].r != 0. || a[i__2].i != 0.) { - -/* Apply the interchange to columns 1:N. */ - - if (jp != j) { - zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); - } - -/* Compute elements J+1:M of J-th column. */ - - if (j < *m) { - i__2 = *m - j; - z_div(&z__1, &c_b60, &a[j + j * a_dim1]); - zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1); - } - - } else if (*info == 0) { - - *info = j; - } - - if (j < min(*m,*n)) { - -/* Update trailing submatrix. */ - - i__2 = *m - j; - i__3 = *n - j; - z__1.r = -1., z__1.i = -0.; - zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j + - (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda) - ; - } -/* L10: */ - } - return 0; - -/* End of ZGETF2 */ - -} /* zgetf2_ */ - -/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a, - integer *lda, integer *ipiv, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j, jb, nb, iinfo; - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), ztrsm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer * - , doublecomplex *, integer *), - zgetf2_(integer *, integer *, doublecomplex *, integer *, integer - *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, - integer *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGETRF computes an LU factorization of a general M-by-N matrix A - using partial pivoting with row interchanges. - - The factorization has the form - A = P * L * U - where P is a permutation matrix, L is lower triangular with unit - diagonal elements (lower trapezoidal if m > n), and U is upper - triangular (upper trapezoidal if m < n). - - This is the right-looking Level 3 BLAS version of the algorithm. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the M-by-N matrix to be factored. - On exit, the factors L and U from the factorization - A = P*L*U; the unit diagonal elements of L are not stored. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - IPIV (output) INTEGER array, dimension (min(M,N)) - The pivot indices; for 1 <= i <= min(M,N), row i of the - matrix was interchanged with row IPIV(i). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization - has been completed, but the factor U is exactly - singular, and division by zero will occur if it is used - to solve a system of equations. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*m)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGETRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) - 1); - if (nb <= 1 || nb >= min(*m,*n)) { - -/* Use unblocked code. */ - - zgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); - } else { - -/* Use blocked code. */ - - i__1 = min(*m,*n); - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__3 = min(*m,*n) - j + 1; - jb = min(i__3,nb); - -/* - Factor diagonal and subdiagonal blocks and test for exact - singularity. -*/ - - i__3 = *m - j + 1; - zgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); - -/* Adjust INFO and the pivot indices. */ - - if (*info == 0 && iinfo > 0) { - *info = iinfo + j - 1; - } -/* Computing MIN */ - i__4 = *m, i__5 = j + jb - 1; - i__3 = min(i__4,i__5); - for (i__ = j; i__ <= i__3; ++i__) { - ipiv[i__] = j - 1 + ipiv[i__]; -/* L10: */ - } - -/* Apply interchanges to columns 1:J-1. */ - - i__3 = j - 1; - i__4 = j + jb - 1; - zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); - - if (j + jb <= *n) { - -/* Apply interchanges to columns J+JB:N. */ - - i__3 = *n - j - jb + 1; - i__4 = j + jb - 1; - zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & - ipiv[1], &c__1); - -/* Compute block row of U. */ - - i__3 = *n - j - jb + 1; - ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & - c_b60, &a[j + j * a_dim1], lda, &a[j + (j + jb) * - a_dim1], lda); - if (j + jb <= *m) { - -/* Update trailing submatrix. */ - - i__3 = *m - j - jb + 1; - i__4 = *n - j - jb + 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, - &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + - jb) * a_dim1], lda, &c_b60, &a[j + jb + (j + jb) * - a_dim1], lda); - } - } -/* L20: */ - } - } - return 0; - -/* End of ZGETRF */ - -} /* zgetrf_ */ - -/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs, - doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b, - integer *ldb, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *), - xerbla_(char *, integer *); - static logical notran; - extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *, - integer *, integer *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZGETRS solves a system of linear equations - A * X = B, A**T * X = B, or A**H * X = B - with a general N-by-N matrix A using the LU factorization computed - by ZGETRF. - - Arguments - ========= - - TRANS (input) CHARACTER*1 - Specifies the form of the system of equations: - = 'N': A * X = B (No transpose) - = 'T': A**T * X = B (Transpose) - = 'C': A**H * X = B (Conjugate transpose) - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The factors L and U from the factorization A = P*L*U - as computed by ZGETRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - IPIV (input) INTEGER array, dimension (N) - The pivot indices from ZGETRF; for 1<=i<=N, row i of the - matrix was interchanged with row IPIV(i). - - B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - notran = lsame_(trans, "N"); - if (! notran && ! lsame_(trans, "T") && ! lsame_( - trans, "C")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZGETRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (notran) { - -/* - Solve A * X = B. - - Apply row interchanges to the right hand sides. -*/ - - zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); - -/* Solve L*X = B, overwriting B with X. */ - - ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b60, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, & - a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A**T * X = B or A**H * X = B. - - Solve U'*X = B, overwriting B with X. -*/ - - ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b60, &a[ - a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b60, &a[a_offset], - lda, &b[b_offset], ldb); - -/* Apply row interchanges to the solution vectors. */ - - zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); - } - - return 0; - -/* End of ZGETRS */ - -} /* zgetrs_ */ - -/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, - doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, - integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, - integer *liwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal eps; - static integer inde; - static doublereal anrm; - static integer imax; - static doublereal rmin, rmax; - static integer lopt; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - static doublereal sigma; - extern logical lsame_(char *, char *); - static integer iinfo, lwmin, liopt; - static logical lower; - static integer llrwk, lropt; - static logical wantz; - static integer indwk2, llwrk2; - - static integer iscale; - static doublereal safmin; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal bignum; - extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, - integer *, doublereal *); - static integer indtau; - extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, - integer *), zlascl_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, integer *, doublecomplex *, integer *, - integer *), zstedc_(char *, integer *, doublereal *, - doublereal *, doublecomplex *, integer *, doublecomplex *, - integer *, doublereal *, integer *, integer *, integer *, integer - *); - static integer indrwk, indwrk, liwmin; - extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, - integer *, doublereal *, doublereal *, doublecomplex *, - doublecomplex *, integer *, integer *), zlacpy_(char *, - integer *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *); - static integer lrwmin, llwork; - static doublereal smlnum; - static logical lquery; - extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *); - - -/* - -- LAPACK driver routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a - complex Hermitian matrix A. If eigenvectors are desired, it uses a - divide and conquer algorithm. - - The divide and conquer algorithm makes very mild assumptions about - floating point arithmetic. It will work on machines with a guard - digit in add/subtract, or on those binary machines without guard - digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or - Cray-2. It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - JOBZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only; - = 'V': Compute eigenvalues and eigenvectors. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA, N) - On entry, the Hermitian matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of A contains the - upper triangular part of the matrix A. If UPLO = 'L', - the leading N-by-N lower triangular part of A contains - the lower triangular part of the matrix A. - On exit, if JOBZ = 'V', then if INFO = 0, A contains the - orthonormal eigenvectors of the matrix A. - If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') - or the upper triangle (if UPLO='U') of A, including the - diagonal, is destroyed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - W (output) DOUBLE PRECISION array, dimension (N) - If INFO = 0, the eigenvalues in ascending order. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The length of the array WORK. - If N <= 1, LWORK must be at least 1. - If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. - If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace/output) DOUBLE PRECISION array, - dimension (LRWORK) - On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. - - LRWORK (input) INTEGER - The dimension of the array RWORK. - If N <= 1, LRWORK must be at least 1. - If JOBZ = 'N' and N > 1, LRWORK must be at least N. - If JOBZ = 'V' and N > 1, LRWORK must be at least - 1 + 5*N + 2*N**2. - - If LRWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the RWORK array, - returns this value as the first entry of the RWORK array, and - no error message related to LRWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If N <= 1, LIWORK must be at least 1. - If JOBZ = 'N' and N > 1, LIWORK must be at least 1. - If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the algorithm failed to converge; i - off-diagonal elements of an intermediate tridiagonal - form did not converge to zero. - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --w; - --work; - --rwork; - --iwork; - - /* Function Body */ - wantz = lsame_(jobz, "V"); - lower = lsame_(uplo, "L"); - lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; - - *info = 0; - if (*n <= 1) { - lwmin = 1; - lrwmin = 1; - liwmin = 1; - lopt = lwmin; - lropt = lrwmin; - liopt = liwmin; - } else { - if (wantz) { - lwmin = (*n << 1) + *n * *n; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); - liwmin = *n * 5 + 3; - } else { - lwmin = *n + 1; - lrwmin = *n; - liwmin = 1; - } - lopt = lwmin; - lropt = lrwmin; - liopt = liwmin; - } - if (! (wantz || lsame_(jobz, "N"))) { - *info = -1; - } else if (! (lower || lsame_(uplo, "U"))) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*lrwork < lrwmin && ! lquery) { - *info = -10; - } else if (*liwork < liwmin && ! lquery) { - *info = -12; - } - - if (*info == 0) { - work[1].r = (doublereal) lopt, work[1].i = 0.; - rwork[1] = (doublereal) lropt; - iwork[1] = liopt; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZHEEVD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - i__1 = a_dim1 + 1; - w[1] = a[i__1].r; - if (wantz) { - i__1 = a_dim1 + 1; - a[i__1].r = 1., a[i__1].i = 0.; - } - return 0; - } - -/* Get machine constants. */ - - safmin = SAFEMINIMUM; - eps = PRECISION; - smlnum = safmin / eps; - bignum = 1. / smlnum; - rmin = sqrt(smlnum); - rmax = sqrt(bignum); - -/* Scale matrix to allowable range, if necessary. */ - - anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]); - iscale = 0; - if (anrm > 0. && anrm < rmin) { - iscale = 1; - sigma = rmin / anrm; - } else if (anrm > rmax) { - iscale = 1; - sigma = rmax / anrm; - } - if (iscale == 1) { - zlascl_(uplo, &c__0, &c__0, &c_b1015, &sigma, n, n, &a[a_offset], lda, - info); - } - -/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ - - inde = 1; - indtau = 1; - indwrk = indtau + *n; - indrwk = inde + *n; - indwk2 = indwrk + *n * *n; - llwork = *lwork - indwrk + 1; - llwrk2 = *lwork - indwk2 + 1; - llrwk = *lrwork - indrwk + 1; - zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & - work[indwrk], &llwork, &iinfo); -/* Computing MAX */ - i__1 = indwrk; - d__1 = (doublereal) lopt, d__2 = (doublereal) (*n) + work[i__1].r; - lopt = (integer) max(d__1,d__2); - -/* - For eigenvalues only, call DSTERF. For eigenvectors, first call - ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the - tridiagonal matrix, then call ZUNMTR to multiply it to the - Householder transformations represented as Householder vectors in - A. -*/ - - if (! wantz) { - dsterf_(n, &w[1], &rwork[inde], info); - } else { - zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], - &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info); - zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[ - indwrk], n, &work[indwk2], &llwrk2, &iinfo); - zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda); -/* - Computing MAX - Computing 2nd power -*/ - i__3 = *n; - i__4 = indwk2; - i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r; - lopt = max(i__1,i__2); - } - -/* If matrix was scaled, then rescale eigenvalues appropriately. */ - - if (iscale == 1) { - if (*info == 0) { - imax = *n; - } else { - imax = *info - 1; - } - d__1 = 1. / sigma; - dscal_(&imax, &d__1, &w[1], &c__1); - } - - work[1].r = (doublereal) lopt, work[1].i = 0.; - rwork[1] = (doublereal) lropt; - iwork[1] = liopt; - - return 0; - -/* End of ZHEEVD */ - -} /* zheevd_ */ - -/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, - integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Local variables */ - static integer i__; - static doublecomplex taui; - extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static doublecomplex alpha; - extern logical lsame_(char *, char *); - extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( - char *, integer *), zlarfg_(integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - ZHETD2 reduces a complex Hermitian matrix A to real symmetric - tridiagonal form T by a unitary similarity transformation: - Q' * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the unitary - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the unitary matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) DOUBLE PRECISION array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) COMPLEX*16 array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZHETD2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - - if (upper) { - -/* Reduce the upper triangle of A */ - - i__1 = *n + *n * a_dim1; - i__2 = *n + *n * a_dim1; - d__1 = a[i__2].r; - a[i__1].r = d__1, a[i__1].i = 0.; - for (i__ = *n - 1; i__ >= 1; --i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(1:i-1,i+1) -*/ - - i__1 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__1].r, alpha.i = a[i__1].i; - zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); - i__1 = i__; - e[i__1] = alpha.r; - - if (taui.r != 0. || taui.i != 0.) { - -/* Apply H(i) from both sides to A(1:i,1:i) */ - - i__1 = i__ + (i__ + 1) * a_dim1; - a[i__1].r = 1., a[i__1].i = 0.; - -/* Compute x := tau * A * v storing x in TAU(1:i) */ - - zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * - a_dim1 + 1], &c__1, &c_b59, &tau[1], &c__1) - ; - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - z__3.r = -.5, z__3.i = -0.; - z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * - taui.i + z__3.i * taui.r; - zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] - , &c__1); - z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * - z__4.i + z__2.i * z__4.r; - alpha.r = z__1.r, alpha.i = z__1.i; - zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ - 1], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - z__1.r = -1., z__1.i = -0.; - zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & - tau[1], &c__1, &a[a_offset], lda); - - } else { - i__1 = i__ + i__ * a_dim1; - i__2 = i__ + i__ * a_dim1; - d__1 = a[i__2].r; - a[i__1].r = d__1, a[i__1].i = 0.; - } - i__1 = i__ + (i__ + 1) * a_dim1; - i__2 = i__; - a[i__1].r = e[i__2], a[i__1].i = 0.; - i__1 = i__ + 1; - i__2 = i__ + 1 + (i__ + 1) * a_dim1; - d__[i__1] = a[i__2].r; - i__1 = i__; - tau[i__1].r = taui.r, tau[i__1].i = taui.i; -/* L10: */ - } - i__1 = a_dim1 + 1; - d__[1] = a[i__1].r; - } else { - -/* Reduce the lower triangle of A */ - - i__1 = a_dim1 + 1; - i__2 = a_dim1 + 1; - d__1 = a[i__2].r; - a[i__1].r = d__1, a[i__1].i = 0.; - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Generate elementary reflector H(i) = I - tau * v * v' - to annihilate A(i+2:n,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & - taui); - i__2 = i__; - e[i__2] = alpha.r; - - if (taui.r != 0. || taui.i != 0.) { - -/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ - - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute x := tau * A * v storing y in TAU(i:n-1) */ - - i__2 = *n - i__; - zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b59, &tau[ - i__], &c__1); - -/* Compute w := x - 1/2 * tau * (x'*v) * v */ - - z__3.r = -.5, z__3.i = -0.; - z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * - taui.i + z__3.i * taui.r; - i__2 = *n - i__; - zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * - a_dim1], &c__1); - z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * - z__4.i + z__2.i * z__4.r; - alpha.r = z__1.r, alpha.i = z__1.i; - i__2 = *n - i__; - zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ - i__], &c__1); - -/* - Apply the transformation as a rank-2 update: - A := A - v * w' - w * v' -*/ - - i__2 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, - &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], - lda); - - } else { - i__2 = i__ + 1 + (i__ + 1) * a_dim1; - i__3 = i__ + 1 + (i__ + 1) * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } - i__2 = i__ + 1 + i__ * a_dim1; - i__3 = i__; - a[i__2].r = e[i__3], a[i__2].i = 0.; - i__2 = i__; - i__3 = i__ + i__ * a_dim1; - d__[i__2] = a[i__3].r; - i__2 = i__; - tau[i__2].r = taui.r, tau[i__2].i = taui.i; -/* L20: */ - } - i__1 = *n; - i__2 = *n + *n * a_dim1; - d__[i__1] = a[i__2].r; - } - - return 0; - -/* End of ZHETD2 */ - -} /* zhetd2_ */ - -/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, - integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, - doublecomplex *work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j, nb, kk, nx, iws; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - static logical upper; - extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, - integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublereal *, doublecomplex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, - doublecomplex *, integer *, doublereal *, doublecomplex *, - doublecomplex *, integer *); - static integer ldwork, lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZHETRD reduces a complex Hermitian matrix A to real symmetric - tridiagonal form T by a unitary similarity transformation: - Q**H * A * Q = T. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit, if UPLO = 'U', the diagonal and first superdiagonal - of A are overwritten by the corresponding elements of the - tridiagonal matrix T, and the elements above the first - superdiagonal, with the array TAU, represent the unitary - matrix Q as a product of elementary reflectors; if UPLO - = 'L', the diagonal and first subdiagonal of A are over- - written by the corresponding elements of the tridiagonal - matrix T, and the elements below the first subdiagonal, with - the array TAU, represent the unitary matrix Q as a product - of elementary reflectors. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - D (output) DOUBLE PRECISION array, dimension (N) - The diagonal elements of the tridiagonal matrix T: - D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (N-1) - The off-diagonal elements of the tridiagonal matrix T: - E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. - - TAU (output) COMPLEX*16 array, dimension (N-1) - The scalar factors of the elementary reflectors (see Further - Details). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= 1. - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n-1) . . . H(2) H(1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in - A(1:i-1,i+1), and tau in TAU(i). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(n-1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), - and tau in TAU(i). - - The contents of A on exit are illustrated by the following examples - with n = 5: - - if UPLO = 'U': if UPLO = 'L': - - ( d e v2 v3 v4 ) ( d ) - ( d e v3 v4 ) ( e d ) - ( d e v4 ) ( v1 e d ) - ( d e ) ( v1 v2 e d ) - ( d ) ( v1 v2 v3 e d ) - - where d and e denote diagonal and off-diagonal elements of T, and vi - denotes an element of the vector defining H(i). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tau; - --work; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } else if (*lwork < 1 && ! lquery) { - *info = -9; - } - - if (*info == 0) { - -/* Determine the block size. */ - - nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, - (ftnlen)1); - lwkopt = *n * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZHETRD", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nx = *n; - iws = 1; - if (nb > 1 && nb < *n) { - -/* - Determine when to cross over from blocked to unblocked code - (last block is always handled by unblocked code). - - Computing MAX -*/ - i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, & - c_n1, (ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *n) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: determine the - minimum value of NB, and reduce NB or force use of - unblocked code by setting NX = N. - - Computing MAX -*/ - i__1 = *lwork / ldwork; - nb = max(i__1,1); - nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, - (ftnlen)6, (ftnlen)1); - if (nb < nbmin) { - nx = *n; - } - } - } else { - nx = *n; - } - } else { - nb = 1; - } - - if (upper) { - -/* - Reduce the upper triangle of A. - Columns 1:kk are handled by the unblocked method. -*/ - - kk = *n - (*n - nx + nb - 1) / nb * nb; - i__1 = kk + 1; - i__2 = -nb; - for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += - i__2) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = i__ + nb - 1; - zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & - work[1], &ldwork); - -/* - Update the unreduced submatrix A(1:i-1,1:i-1), using an - update of the form: A := A - V*W' - W*V' -*/ - - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 - + 1], lda, &work[1], &ldwork, &c_b1015, &a[a_offset], lda); - -/* - Copy superdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j - 1 + j * a_dim1; - i__5 = j - 1; - a[i__4].r = e[i__5], a[i__4].i = 0.; - i__4 = j; - i__5 = j + j * a_dim1; - d__[i__4] = a[i__5].r; -/* L10: */ - } -/* L20: */ - } - -/* Use unblocked code to reduce the last or only block */ - - zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); - } else { - -/* Reduce the lower triangle of A */ - - i__2 = *n - nx; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { - -/* - Reduce columns i:i+nb-1 to tridiagonal form and form the - matrix W which is needed to update the unreduced part of - the matrix -*/ - - i__3 = *n - i__ + 1; - zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & - tau[i__], &work[1], &ldwork); - -/* - Update the unreduced submatrix A(i+nb:n,i+nb:n), using - an update of the form: A := A - V*W' - W*V' -*/ - - i__3 = *n - i__ - nb + 1; - z__1.r = -1., z__1.i = -0.; - zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb + - i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1015, &a[ - i__ + nb + (i__ + nb) * a_dim1], lda); - -/* - Copy subdiagonal elements back into A, and diagonal - elements into D -*/ - - i__3 = i__ + nb - 1; - for (j = i__; j <= i__3; ++j) { - i__4 = j + 1 + j * a_dim1; - i__5 = j; - a[i__4].r = e[i__5], a[i__4].i = 0.; - i__4 = j; - i__5 = j + j * a_dim1; - d__[i__4] = a[i__5].r; -/* L30: */ - } -/* L40: */ - } - -/* Use unblocked code to reduce the last or only block */ - - i__1 = *n - i__ + 1; - zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], - &tau[i__], &iinfo); - } - - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZHETRD */ - -} /* zhetrd_ */ - -/* Subroutine */ int zhseqr_(char *job, char *compz, integer *n, integer *ilo, - integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w, - doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], - i__5, i__6; - doublereal d__1, d__2, d__3, d__4; - doublecomplex z__1; - char ch__1[2]; - - /* Builtin functions */ - double d_imag(doublecomplex *); - void d_cnjg(doublecomplex *, doublecomplex *); - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__, j, k, l; - static doublecomplex s[225] /* was [15][15] */, v[16]; - static integer i1, i2, ii, nh, nr, ns, nv; - static doublecomplex vv[16]; - static integer itn; - static doublecomplex tau; - static integer its; - static doublereal ulp, tst1; - static integer maxb, ierr; - static doublereal unfl; - static doublecomplex temp; - static doublereal ovfl; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *); - static integer itemp; - static doublereal rtemp; - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - static logical initz, wantt, wantz; - static doublereal rwork[1]; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - extern doublereal dlapy2_(doublereal *, doublereal *); - extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); - - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zdscal_(integer *, doublereal *, - doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *); - extern integer izamax_(integer *, doublecomplex *, integer *); - extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, - doublereal *); - extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *, - integer *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *, integer *, doublecomplex *, integer *, integer *), - zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zlaset_(char *, integer *, - integer *, doublecomplex *, doublecomplex *, doublecomplex *, - integer *), zlarfx_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *); - static doublereal smlnum; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZHSEQR computes the eigenvalues of a complex upper Hessenberg - matrix H, and, optionally, the matrices T and Z from the Schur - decomposition H = Z T Z**H, where T is an upper triangular matrix - (the Schur form), and Z is the unitary matrix of Schur vectors. - - Optionally Z may be postmultiplied into an input unitary matrix Q, - so that this routine can give the Schur factorization of a matrix A - which has been reduced to the Hessenberg form H by the unitary - matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. - - Arguments - ========= - - JOB (input) CHARACTER*1 - = 'E': compute eigenvalues only; - = 'S': compute eigenvalues and the Schur form T. - - COMPZ (input) CHARACTER*1 - = 'N': no Schur vectors are computed; - = 'I': Z is initialized to the unit matrix and the matrix Z - of Schur vectors of H is returned; - = 'V': Z must contain an unitary matrix Q on entry, and - the product Q*Z is returned. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows - and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally - set by a previous call to ZGEBAL, and then passed to CGEHRD - when the matrix output by ZGEBAL is reduced to Hessenberg - form. Otherwise ILO and IHI should be set to 1 and N - respectively. - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - H (input/output) COMPLEX*16 array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if JOB = 'S', H contains the upper triangular matrix - T from the Schur decomposition (the Schur form). If - JOB = 'E', the contents of H are unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - W (output) COMPLEX*16 array, dimension (N) - The computed eigenvalues. If JOB = 'S', the eigenvalues are - stored in the same order as on the diagonal of the Schur form - returned in H, with W(i) = H(i,i). - - Z (input/output) COMPLEX*16 array, dimension (LDZ,N) - If COMPZ = 'N': Z is not referenced. - If COMPZ = 'I': on entry, Z need not be set, and on exit, Z - contains the unitary matrix Z of the Schur vectors of H. - If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, - which is assumed to be equal to the unit matrix except for - the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. - Normally Q is the unitary matrix generated by ZUNGHR after - the call to ZGEHRD which formed the Hessenberg matrix H. - - LDZ (input) INTEGER - The leading dimension of the array Z. - LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, ZHSEQR failed to compute all the - eigenvalues in a total of 30*(IHI-ILO+1) iterations; - elements 1:ilo-1 and i+1:n of W contain those - eigenvalues which have been successfully computed. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --w; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - wantt = lsame_(job, "S"); - initz = lsame_(compz, "I"); - wantz = initz || lsame_(compz, "V"); - - *info = 0; - i__1 = max(1,*n); - work[1].r = (doublereal) i__1, work[1].i = 0.; - lquery = *lwork == -1; - if (! lsame_(job, "E") && ! wantt) { - *info = -1; - } else if (! lsame_(compz, "N") && ! wantz) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -4; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -5; - } else if (*ldh < max(1,*n)) { - *info = -7; - } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { - *info = -10; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZHSEQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Initialize Z, if necessary */ - - if (initz) { - zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz); - } - -/* Store the eigenvalues isolated by ZGEBAL. */ - - i__1 = *ilo - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__ + i__ * h_dim1; - w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; -/* L10: */ - } - i__1 = *n; - for (i__ = *ihi + 1; i__ <= i__1; ++i__) { - i__2 = i__; - i__3 = i__ + i__ * h_dim1; - w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; -/* L20: */ - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - i__1 = *ilo; - i__2 = *ilo + *ilo * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - return 0; - } - -/* - Set rows and columns ILO to IHI to zero below the first - subdiagonal. -*/ - - i__1 = *ihi - 2; - for (j = *ilo; j <= i__1; ++j) { - i__2 = *n; - for (i__ = j + 2; i__ <= i__2; ++i__) { - i__3 = i__ + j * h_dim1; - h__[i__3].r = 0., h__[i__3].i = 0.; -/* L30: */ - } -/* L40: */ - } - nh = *ihi - *ilo + 1; - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are re-set inside the main loop. -*/ - - if (wantt) { - i1 = 1; - i2 = *n; - } else { - i1 = *ilo; - i2 = *ihi; - } - -/* Ensure that the subdiagonal elements are real. */ - - i__1 = *ihi; - for (i__ = *ilo + 1; i__ <= i__1; ++i__) { - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (d_imag(&temp) != 0.) { - d__1 = temp.r; - d__2 = d_imag(&temp); - rtemp = dlapy2_(&d__1, &d__2); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.; - z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; - temp.r = z__1.r, temp.i = z__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - d_cnjg(&z__1, &temp); - zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (i__ < *ihi) { - i__2 = i__ + 1 + i__ * h_dim1; - i__3 = i__ + 1 + i__ * h_dim1; - z__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, z__1.i = - temp.r * h__[i__3].i + temp.i * h__[i__3].r; - h__[i__2].r = z__1.r, h__[i__2].i = z__1.i; - } - if (wantz) { - zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); - } - } -/* L50: */ - } - -/* - Determine the order of the multi-shift QR algorithm to be used. - - Writing concatenation -*/ - i__4[0] = 1, a__1[0] = job; - i__4[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); - ns = ilaenv_(&c__4, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); -/* Writing concatenation */ - i__4[0] = 1, a__1[0] = job; - i__4[1] = 1, a__1[1] = compz; - s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); - maxb = ilaenv_(&c__8, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (ns <= 1 || ns > nh || maxb >= nh) { - -/* Use the standard double-shift algorithm */ - - zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, - ihi, &z__[z_offset], ldz, info); - return 0; - } - maxb = max(2,maxb); -/* Computing MIN */ - i__1 = min(ns,maxb); - ns = min(i__1,15); - -/* - Now 1 < NS <= MAXB < NH. - - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - unfl = SAFEMINIMUM; - ovfl = 1. / unfl; - dlabad_(&unfl, &ovfl); - ulp = PRECISION; - smlnum = unfl * (nh / ulp); - -/* ITN is the total number of multiple-shift QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of at most MAXB. Each iteration of the loop - works with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO, or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L60: - if (i__ < *ilo) { - goto L180; - } - -/* - Perform multiple-shift QR iterations on rows and columns ILO to I - until a submatrix of order at most MAXB splits off at the bottom - because a subdiagonal element has become negligible. -*/ - - l = *ilo; - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - i__3 = k - 1 + (k - 1) * h_dim1; - i__5 = k + k * h_dim1; - tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k - - 1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__5].r, - abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs( - d__4))); - if (tst1 == 0.) { - i__3 = i__ - l + 1; - tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); - } - i__3 = k + (k - 1) * h_dim1; -/* Computing MAX */ - d__2 = ulp * tst1; - if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) { - goto L80; - } -/* L70: */ - } -L80: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible. */ - - i__2 = l + (l - 1) * h_dim1; - h__[i__2].r = 0., h__[i__2].i = 0.; - } - -/* Exit from loop if a submatrix of order <= MAXB has split off. */ - - if (l >= i__ - maxb + 1) { - goto L170; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! wantt) { - i1 = l; - i2 = i__; - } - - if (its == 20 || its == 30) { - -/* Exceptional shifts. */ - - i__2 = i__; - for (ii = i__ - ns + 1; ii <= i__2; ++ii) { - i__3 = ii; - i__5 = ii + (ii - 1) * h_dim1; - i__6 = ii + ii * h_dim1; - d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = h__[i__6].r, - abs(d__2))) * 1.5; - w[i__3].r = d__3, w[i__3].i = 0.; -/* L90: */ - } - } else { - -/* Use eigenvalues of trailing submatrix of order NS as shifts. */ - - zlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) * - h_dim1], ldh, s, &c__15); - zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - - ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); - if (ierr > 0) { - -/* - If ZLAHQR failed to compute all NS eigenvalues, use the - unconverged diagonal elements as the remaining shifts. -*/ - - i__2 = ierr; - for (ii = 1; ii <= i__2; ++ii) { - i__3 = i__ - ns + ii; - i__5 = ii + ii * 15 - 16; - w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i; -/* L100: */ - } - } - } - -/* - Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) - where G is the Hessenberg submatrix H(L:I,L:I) and w is - the vector of shifts (stored in W). The result is - stored in the local array V. -*/ - - v[0].r = 1., v[0].i = 0.; - i__2 = ns + 1; - for (ii = 2; ii <= i__2; ++ii) { - i__3 = ii - 1; - v[i__3].r = 0., v[i__3].i = 0.; -/* L110: */ - } - nv = 1; - i__2 = i__; - for (j = i__ - ns + 1; j <= i__2; ++j) { - i__3 = nv + 1; - zcopy_(&i__3, v, &c__1, vv, &c__1); - i__3 = nv + 1; - i__5 = j; - z__1.r = -w[i__5].r, z__1.i = -w[i__5].i; - zgemv_("No transpose", &i__3, &nv, &c_b60, &h__[l + l * h_dim1], - ldh, vv, &c__1, &z__1, v, &c__1); - ++nv; - -/* - Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, - reset it to the unit vector. -*/ - - itemp = izamax_(&nv, v, &c__1); - i__3 = itemp - 1; - rtemp = (d__1 = v[i__3].r, abs(d__1)) + (d__2 = d_imag(&v[itemp - - 1]), abs(d__2)); - if (rtemp == 0.) { - v[0].r = 1., v[0].i = 0.; - i__3 = nv; - for (ii = 2; ii <= i__3; ++ii) { - i__5 = ii - 1; - v[i__5].r = 0., v[i__5].i = 0.; -/* L120: */ - } - } else { - rtemp = max(rtemp,smlnum); - d__1 = 1. / rtemp; - zdscal_(&nv, &d__1, v, &c__1); - } -/* L130: */ - } - -/* Multiple-shift QR step */ - - i__2 = i__ - 1; - for (k = l; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. NR is the order of G. - - Computing MIN -*/ - i__3 = ns + 1, i__5 = i__ - k + 1; - nr = min(i__3,i__5); - if (k > l) { - zcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - zlarfg_(&nr, v, &v[1], &c__1, &tau); - if (k > l) { - i__3 = k + (k - 1) * h_dim1; - h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; - i__3 = i__; - for (ii = k + 1; ii <= i__3; ++ii) { - i__5 = ii + (k - 1) * h_dim1; - h__[i__5].r = 0., h__[i__5].i = 0.; -/* L140: */ - } - } - v[0].r = 1., v[0].i = 0.; - -/* - Apply G' from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2 - k + 1; - d_cnjg(&z__1, &tau); - zlarfx_("Left", &nr, &i__3, v, &z__1, &h__[k + k * h_dim1], ldh, & - work[1]); - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+NR,I). - - Computing MIN -*/ - i__5 = k + nr; - i__3 = min(i__5,i__) - i1 + 1; - zlarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh, - &work[1]); - - if (wantz) { - -/* Accumulate transformations in the matrix Z */ - - zlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1], - ldz, &work[1]); - } -/* L150: */ - } - -/* Ensure that H(I,I-1) is real. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (d_imag(&temp) != 0.) { - d__1 = temp.r; - d__2 = d_imag(&temp); - rtemp = dlapy2_(&d__1, &d__2); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.; - z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; - temp.r = z__1.r, temp.i = z__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - d_cnjg(&z__1, &temp); - zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (wantz) { - zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1); - } - } - -/* L160: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L170: - -/* - A submatrix of order <= MAXB in rows and columns L to I has split - off. Use the double-shift QR algorithm to handle it. -*/ - - zlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi, - &z__[z_offset], ldz, info); - if (*info > 0) { - return 0; - } - -/* - Decrement number of remaining iterations, and return to start of - the main loop with a new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L60; - -L180: - i__1 = max(1,*n); - work[1].r = (doublereal) i__1, work[1].i = 0.; - return 0; - -/* End of ZHSEQR */ - -} /* zhseqr_ */ - -/* Subroutine */ int zlabrd_(integer *m, integer *n, integer *nb, - doublecomplex *a, integer *lda, doublereal *d__, doublereal *e, - doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer * - ldx, doublecomplex *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, - i__3; - doublecomplex z__1; - - /* Local variables */ - static integer i__; - static doublecomplex alpha; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), - zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *), zlacgv_(integer *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLABRD reduces the first NB rows and columns of a complex general - m by n matrix A to upper or lower real bidiagonal form by a unitary - transformation Q' * A * P, and returns the matrices X and Y which - are needed to apply the transformation to the unreduced part of A. - - If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower - bidiagonal form. - - This is an auxiliary routine called by ZGEBRD - - Arguments - ========= - - M (input) INTEGER - The number of rows in the matrix A. - - N (input) INTEGER - The number of columns in the matrix A. - - NB (input) INTEGER - The number of leading rows and columns of A to be reduced. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n general matrix to be reduced. - On exit, the first NB rows and columns of the matrix are - overwritten; the rest of the array is unchanged. - If m >= n, elements on and below the diagonal in the first NB - columns, with the array TAUQ, represent the unitary - matrix Q as a product of elementary reflectors; and - elements above the diagonal in the first NB rows, with the - array TAUP, represent the unitary matrix P as a product - of elementary reflectors. - If m < n, elements below the diagonal in the first NB - columns, with the array TAUQ, represent the unitary - matrix Q as a product of elementary reflectors, and - elements on and above the diagonal in the first NB rows, - with the array TAUP, represent the unitary matrix P as - a product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - D (output) DOUBLE PRECISION array, dimension (NB) - The diagonal elements of the first NB rows and columns of - the reduced matrix. D(i) = A(i,i). - - E (output) DOUBLE PRECISION array, dimension (NB) - The off-diagonal elements of the first NB rows and columns of - the reduced matrix. - - TAUQ (output) COMPLEX*16 array dimension (NB) - The scalar factors of the elementary reflectors which - represent the unitary matrix Q. See Further Details. - - TAUP (output) COMPLEX*16 array, dimension (NB) - The scalar factors of the elementary reflectors which - represent the unitary matrix P. See Further Details. - - X (output) COMPLEX*16 array, dimension (LDX,NB) - The m-by-nb matrix X required to update the unreduced part - of A. - - LDX (input) INTEGER - The leading dimension of the array X. LDX >= max(1,M). - - Y (output) COMPLEX*16 array, dimension (LDY,NB) - The n-by-nb matrix Y required to update the unreduced part - of A. - - LDY (output) INTEGER - The leading dimension of the array Y. LDY >= max(1,N). - - Further Details - =============== - - The matrices Q and P are represented as products of elementary - reflectors: - - Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) - - Each H(i) and G(i) has the form: - - H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' - - where tauq and taup are complex scalars, and v and u are complex - vectors. - - If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in - A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in - A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in - A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). - - The elements of the vectors v and u together form the m-by-nb matrix - V and the nb-by-n matrix U' which are needed, with X and Y, to apply - the transformation to the unreduced part of the matrix, using a block - update of the form: A := A - V*Y' - X*U'. - - The contents of A on exit are illustrated by the following examples - with nb = 2: - - m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): - - ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) - ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) - ( v1 v2 a a a ) ( v1 1 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) ( v1 v2 a a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix which is unchanged, - vi denotes an element of the vector defining H(i), and ui an element - of the vector defining G(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --d__; - --e; - --tauq; - --taup; - x_dim1 = *ldx; - x_offset = 1 + x_dim1; - x -= x_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (*m >= *n) { - -/* Reduce to upper bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:m,i) */ - - i__2 = i__ - 1; - zlacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, - &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - zlacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx, - &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[i__ + i__ * - a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, & - tauq[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + ( - i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & - c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + - a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b59, & - y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[ - i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__ + 1; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &x[i__ + - x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b59, & - y[i__ * y_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + - 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - -/* Update A(i,i+1:n) */ - - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - zlacgv_(&i__, &a[i__ + a_dim1], lda); - i__2 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 + - y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b60, &a[i__ + - (i__ + 1) * a_dim1], lda); - zlacgv_(&i__, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &x[i__ + x_dim1], ldx); - i__2 = i__ - 1; - i__3 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + - 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, - &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &x[i__ + x_dim1], ldx); - -/* Generate reflection P(i) to annihilate A(i,i+2:n) */ - - i__2 = i__ + (i__ + 1) * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + (i__ + 1) * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + ( - i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], - lda, &c_b59, &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &y[i__ + 1 - + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b59, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b59, &x[i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Reduce to lower bidiagonal form */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i,i:n) */ - - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy, - &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], - lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &x[i__ + x_dim1], ldx); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ * - a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, &a[i__ + - i__ * a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &x[i__ + x_dim1], ldx); - -/* Generate reflection P(i) to annihilate A(i,i+1:n) */ - - i__2 = i__ + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__ + 1; -/* Computing MIN */ - i__3 = i__ + 1; - zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, & - taup[i__]); - i__2 = i__; - d__[i__2] = alpha.r; - if (i__ < *m) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute X(i+1:m,i) */ - - i__2 = *m - i__; - i__3 = *n - i__ + 1; - zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + i__ - * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, & - x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &y[i__ + - y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[ - i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + - a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ * a_dim1 - + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[ - i__ * x_dim1 + 1], &c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + - x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[ - i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *m - i__; - zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - -/* Update A(i+1:m,i) */ - - i__2 = i__ - 1; - zlacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + - a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + - 1 + i__ * a_dim1], &c__1); - i__2 = i__ - 1; - zlacgv_(&i__2, &y[i__ + y_dim1], ldy); - i__2 = *m - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 + - x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - -/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *m - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, - &tauq[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute Y(i+1:n,i) */ - - i__2 = *m - i__; - i__3 = *n - i__; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + - 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * - a_dim1], &c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], & - c__1); - i__2 = *m - i__; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b59, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + - y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[ - i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *m - i__; - zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &x[i__ + 1 - + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b59, &y[i__ * y_dim1 + 1], &c__1); - i__2 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1) - * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & - c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1); - i__2 = *n - i__; - zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); - } else { - i__2 = *n - i__ + 1; - zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); - } -/* L20: */ - } - } - return 0; - -/* End of ZLABRD */ - -} /* zlabrd_ */ - -/* Subroutine */ int zlacgv_(integer *n, doublecomplex *x, integer *incx) -{ - /* System generated locals */ - integer i__1, i__2; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, ioff; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLACGV conjugates a complex vector of length N. - - Arguments - ========= - - N (input) INTEGER - The length of the vector X. N >= 0. - - X (input/output) COMPLEX*16 array, dimension - (1+(N-1)*abs(INCX)) - On entry, the vector of length N to be conjugated. - On exit, X is overwritten with conjg(X). - - INCX (input) INTEGER - The spacing between successive elements of X. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*incx == 1) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - d_cnjg(&z__1, &x[i__]); - x[i__2].r = z__1.r, x[i__2].i = z__1.i; -/* L10: */ - } - } else { - ioff = 1; - if (*incx < 0) { - ioff = 1 - (*n - 1) * *incx; - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = ioff; - d_cnjg(&z__1, &x[ioff]); - x[i__2].r = z__1.r, x[i__2].i = z__1.i; - ioff += *incx; -/* L20: */ - } - } - return 0; - -/* End of ZLACGV */ - -} /* zlacgv_ */ - -/* Subroutine */ int zlacp2_(char *uplo, integer *m, integer *n, doublereal * - a, integer *lda, doublecomplex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLACP2 copies all or part of a real two-dimensional matrix A to a - complex matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper trapezium - is accessed; if UPLO = 'L', only the lower trapezium is - accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) COMPLEX*16 array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.; -/* L30: */ - } -/* L40: */ - } - - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4], b[i__3].i = 0.; -/* L50: */ - } -/* L60: */ - } - } - - return 0; - -/* End of ZLACP2 */ - -} /* zlacp2_ */ - -/* Subroutine */ int zlacpy_(char *uplo, integer *m, integer *n, - doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - ZLACPY copies all or part of a two-dimensional matrix A to another - matrix B. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be copied to B. - = 'U': Upper triangular part - = 'L': Lower triangular part - Otherwise: All of the matrix A - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The m by n matrix A. If UPLO = 'U', only the upper trapezium - is accessed; if UPLO = 'L', only the lower trapezium is - accessed. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - B (output) COMPLEX*16 array, dimension (LDB,N) - On exit, B = A in the locations specified by UPLO. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L10: */ - } -/* L20: */ - } - - } else if (lsame_(uplo, "L")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L30: */ - } -/* L40: */ - } - - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - i__4 = i__ + j * a_dim1; - b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; -/* L50: */ - } -/* L60: */ - } - } - - return 0; - -/* End of ZLACPY */ - -} /* zlacpy_ */ - -/* Subroutine */ int zlacrm_(integer *m, integer *n, doublecomplex *a, - integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, - integer *ldc, doublereal *rwork) -{ - /* System generated locals */ - integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLACRM performs a very simple matrix-matrix multiplication: - C := A * B, - where A is M by N and complex; B is N by N and real; - C is M by N and complex. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A and of the matrix C. - M >= 0. - - N (input) INTEGER - The number of columns and rows of the matrix B and - the number of columns of the matrix C. - N >= 0. - - A (input) COMPLEX*16 array, dimension (LDA, N) - A contains the M by N matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >=max(1,M). - - B (input) DOUBLE PRECISION array, dimension (LDB, N) - B contains the N by N matrix B. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >=max(1,N). - - C (input) COMPLEX*16 array, dimension (LDC, N) - C contains the M by N matrix C. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >=max(1,N). - - RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) - - ===================================================================== - - - Quick return if possible. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --rwork; - - /* Function Body */ - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - rwork[(j - 1) * *m + i__] = a[i__3].r; -/* L10: */ - } -/* L20: */ - } - - l = *m * *n + 1; - dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, & - c_b324, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = l + (j - 1) * *m + i__ - 1; - c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; -/* L30: */ - } -/* L40: */ - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - rwork[(j - 1) * *m + i__] = d_imag(&a[i__ + j * a_dim1]); -/* L50: */ - } -/* L60: */ - } - dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, & - c_b324, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - d__1 = c__[i__4].r; - i__5 = l + (j - 1) * *m + i__ - 1; - z__1.r = d__1, z__1.i = rwork[i__5]; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L70: */ - } -/* L80: */ - } - - return 0; - -/* End of ZLACRM */ - -} /* zlacrm_ */ - -/* Double Complex */ VOID zladiv_(doublecomplex * ret_val, doublecomplex *x, - doublecomplex *y) -{ - /* System generated locals */ - doublereal d__1, d__2, d__3, d__4; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - - /* Local variables */ - static doublereal zi, zr; - extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLADIV := X / Y, where X and Y are complex. The computation of X / Y - will not overflow on an intermediary step unless the results - overflows. - - Arguments - ========= - - X (input) COMPLEX*16 - Y (input) COMPLEX*16 - The complex scalars X and Y. - - ===================================================================== -*/ - - - d__1 = x->r; - d__2 = d_imag(x); - d__3 = y->r; - d__4 = d_imag(y); - dladiv_(&d__1, &d__2, &d__3, &d__4, &zr, &zi); - z__1.r = zr, z__1.i = zi; - ret_val->r = z__1.r, ret_val->i = z__1.i; - - return ; - -/* End of ZLADIV */ - -} /* zladiv_ */ - -/* Subroutine */ int zlaed0_(integer *qsiz, integer *n, doublereal *d__, - doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, - integer *ldqs, doublereal *rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; - doublereal d__1; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; - static doublereal temp; - static integer curr, iperm; - extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, - doublereal *, integer *); - static integer indxq, iwrem, iqptr, tlvls; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zlaed7_(integer *, integer *, - integer *, integer *, integer *, integer *, doublereal *, - doublecomplex *, integer *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, integer *, integer *, - doublereal *, doublecomplex *, doublereal *, integer *, integer *) - ; - static integer igivcl; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlacrm_(integer *, integer *, doublecomplex *, - integer *, doublereal *, integer *, doublecomplex *, integer *, - doublereal *); - static integer igivnm, submat, curprb, subpbs, igivpt; - extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *); - static integer curlvl, matsiz, iprmpt, smlsiz; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - Using the divide and conquer method, ZLAED0 computes all eigenvalues - of a symmetric tridiagonal matrix which is one diagonal block of - those from reducing a dense or band Hermitian matrix and - corresponding eigenvectors of the dense or band matrix. - - Arguments - ========= - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the off-diagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Q (input/output) COMPLEX*16 array, dimension (LDQ,N) - On entry, Q must contain an QSIZ x N matrix whose columns - unitarily orthonormal. It is a part of the unitary matrix - that reduces the full dense Hermitian matrix to a - (reducible) symmetric tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - IWORK (workspace) INTEGER array, - the dimension of IWORK must be at least - 6 + 6*N + 5*N*lg N - ( lg( N ) = smallest integer k - such that 2^k >= N ) - - RWORK (workspace) DOUBLE PRECISION array, - dimension (1 + 3*N + 2*N*lg N + 3*N**2) - ( lg( N ) = smallest integer k - such that 2^k >= N ) - - QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N) - Used to store parts of - the eigenvector matrix when the updating matrix multiplies - take place. - - LDQS (input) INTEGER - The leading dimension of the array QSTORE. - LDQS >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - ===================================================================== - - Warning: N could be as big as QSIZ! - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - qstore_dim1 = *ldqs; - qstore_offset = 1 + qstore_dim1; - qstore -= qstore_offset; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - -/* - IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN - INFO = -1 - ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) - $ THEN -*/ - if (*qsiz < max(0,*n)) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldq < max(1,*n)) { - *info = -6; - } else if (*ldqs < max(1,*n)) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLAED0", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - smlsiz = ilaenv_(&c__9, "ZLAED0", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - Determine the size and placement of the submatrices, and save in - the leading elements of IWORK. -*/ - - iwork[1] = *n; - subpbs = 1; - tlvls = 0; -L10: - if (iwork[subpbs] > smlsiz) { - for (j = subpbs; j >= 1; --j) { - iwork[j * 2] = (iwork[j] + 1) / 2; - iwork[(j << 1) - 1] = iwork[j] / 2; -/* L20: */ - } - ++tlvls; - subpbs <<= 1; - goto L10; - } - i__1 = subpbs; - for (j = 2; j <= i__1; ++j) { - iwork[j] += iwork[j - 1]; -/* L30: */ - } - -/* - Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 - using rank-1 modifications (cuts). -*/ - - spm1 = subpbs - 1; - i__1 = spm1; - for (i__ = 1; i__ <= i__1; ++i__) { - submat = iwork[i__] + 1; - smm1 = submat - 1; - d__[smm1] -= (d__1 = e[smm1], abs(d__1)); - d__[submat] -= (d__1 = e[smm1], abs(d__1)); -/* L40: */ - } - - indxq = (*n << 2) + 3; - -/* - Set up workspaces for eigenvalues only/accumulate new vectors - routine -*/ - - temp = log((doublereal) (*n)) / log(2.); - lgn = (integer) temp; - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - iprmpt = indxq + *n + 1; - iperm = iprmpt + *n * lgn; - iqptr = iperm + *n * lgn; - igivpt = iqptr + *n + 2; - igivcl = igivpt + *n * lgn; - - igivnm = 1; - iq = igivnm + (*n << 1) * lgn; -/* Computing 2nd power */ - i__1 = *n; - iwrem = iq + i__1 * i__1 + 1; -/* Initialize pointers */ - i__1 = subpbs; - for (i__ = 0; i__ <= i__1; ++i__) { - iwork[iprmpt + i__] = 1; - iwork[igivpt + i__] = 1; -/* L50: */ - } - iwork[iqptr] = 1; - -/* - Solve each submatrix eigenproblem at the bottom of the divide and - conquer tree. -*/ - - curr = 0; - i__1 = spm1; - for (i__ = 0; i__ <= i__1; ++i__) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[1]; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 1] - iwork[i__]; - } - ll = iq - 1 + iwork[iqptr + curr]; - dsteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & - rwork[1], info); - zlacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & - matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] - ); -/* Computing 2nd power */ - i__2 = matsiz; - iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; - ++curr; - if (*info > 0) { - *info = submat * (*n + 1) + submat + matsiz - 1; - return 0; - } - k = 1; - i__2 = iwork[i__ + 1]; - for (j = submat; j <= i__2; ++j) { - iwork[indxq + j] = k; - ++k; -/* L60: */ - } -/* L70: */ - } - -/* - Successively merge eigensystems of adjacent submatrices - into eigensystem for the corresponding larger matrix. - - while ( SUBPBS > 1 ) -*/ - - curlvl = 1; -L80: - if (subpbs > 1) { - spm2 = subpbs - 2; - i__1 = spm2; - for (i__ = 0; i__ <= i__1; i__ += 2) { - if (i__ == 0) { - submat = 1; - matsiz = iwork[2]; - msd2 = iwork[1]; - curprb = 0; - } else { - submat = iwork[i__] + 1; - matsiz = iwork[i__ + 2] - iwork[i__]; - msd2 = matsiz / 2; - ++curprb; - } - -/* - Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) - into an eigensystem of size MATSIZ. ZLAED7 handles the case - when the eigenvectors of a full or band Hermitian matrix (which - was reduced to tridiagonal form) are desired. - - I am free to use Q as a valuable working space until Loop 150. -*/ - - zlaed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ - submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ - submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & - iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ - igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * - q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); - if (*info > 0) { - *info = submat * (*n + 1) + submat + matsiz - 1; - return 0; - } - iwork[i__ / 2 + 1] = iwork[i__ + 2]; -/* L90: */ - } - subpbs /= 2; - ++curlvl; - goto L80; - } - -/* - end while - - Re-merge the eigenvalues/vectors which were deflated at the final - merge step. -*/ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - j = iwork[indxq + i__]; - rwork[i__] = d__[j]; - zcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] - , &c__1); -/* L100: */ - } - dcopy_(n, &rwork[1], &c__1, &d__[1], &c__1); - - return 0; - -/* End of ZLAED0 */ - -} /* zlaed0_ */ - -/* Subroutine */ int zlaed7_(integer *n, integer *cutpnt, integer *qsiz, - integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, - doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, - doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, - integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * - work, doublereal *rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, i__1, i__2; - - /* Builtin functions */ - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr, - indxc, indxp; - extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, integer *, integer *), - zlaed8_(integer *, integer *, integer *, doublecomplex *, integer - *, doublereal *, doublereal *, integer *, doublereal *, - doublereal *, doublecomplex *, integer *, doublereal *, integer *, - integer *, integer *, integer *, integer *, integer *, - doublereal *, integer *), dlaeda_(integer *, integer *, integer *, - integer *, integer *, integer *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, doublereal *, - integer *); - static integer idlmda; - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *), zlacrm_(integer *, integer *, doublecomplex *, integer *, - doublereal *, integer *, doublecomplex *, integer *, doublereal * - ); - static integer coltyp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLAED7 computes the updated eigensystem of a diagonal - matrix after modification by a rank-one symmetric matrix. This - routine is used only for the eigenproblem which requires all - eigenvalues and optionally eigenvectors of a dense or banded - Hermitian matrix that has been reduced to tridiagonal form. - - T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) - - where Z = Q'u, u is a vector of length N with ones in the - CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. - - The eigenvectors of the original matrix are stored in Q, and the - eigenvalues are in D. The algorithm consists of three stages: - - The first stage consists of deflating the size of the problem - when there are multiple eigenvalues or if there is a zero in - the Z vector. For each such occurence the dimension of the - secular equation problem is reduced by one. This stage is - performed by the routine DLAED2. - - The second stage consists of calculating the updated - eigenvalues. This is done by finding the roots of the secular - equation via the routine DLAED4 (as called by SLAED3). - This routine also calculates the eigenvectors of the current - problem. - - The final stage consists of computing the updated eigenvectors - directly using the updated eigenvalues. The eigenvectors for - the current problem are multiplied with the eigenvectors from - the overall problem. - - Arguments - ========= - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. min(1,N) <= CUTPNT <= N. - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the full matrix to tridiagonal form. QSIZ >= N. - - TLVLS (input) INTEGER - The total number of merging levels in the overall divide and - conquer tree. - - CURLVL (input) INTEGER - The current level in the overall merge routine, - 0 <= curlvl <= tlvls. - - CURPBM (input) INTEGER - The current problem in the current level in the overall - merge routine (counting from upper left to lower right). - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the eigenvalues of the rank-1-perturbed matrix. - On exit, the eigenvalues of the repaired matrix. - - Q (input/output) COMPLEX*16 array, dimension (LDQ,N) - On entry, the eigenvectors of the rank-1-perturbed matrix. - On exit, the eigenvectors of the repaired tridiagonal matrix. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max(1,N). - - RHO (input) DOUBLE PRECISION - Contains the subdiagonal element used to create the rank-1 - modification. - - INDXQ (output) INTEGER array, dimension (N) - This contains the permutation which will reintegrate the - subproblem just solved back into sorted order, - ie. D( INDXQ( I = 1, N ) ) will be in ascending order. - - IWORK (workspace) INTEGER array, dimension (4*N) - - RWORK (workspace) DOUBLE PRECISION array, - dimension (3*N+2*QSIZ*N) - - WORK (workspace) COMPLEX*16 array, dimension (QSIZ*N) - - QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1) - Stores eigenvectors of submatrices encountered during - divide and conquer, packed together. QPTR points to - beginning of the submatrices. - - QPTR (input/output) INTEGER array, dimension (N+2) - List of indices pointing to beginning of submatrices stored - in QSTORE. The submatrices are numbered starting at the - bottom left of the divide and conquer tree, from left to - right and bottom to top. - - PRMPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in PERM a - level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) - indicates the size of the permutation and also the size of - the full, non-deflated problem. - - PERM (input) INTEGER array, dimension (N lg N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (input) INTEGER array, dimension (N lg N) - Contains a list of pointers which indicate where in GIVCOL a - level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) - indicates the number of Givens rotations. - - GIVCOL (input) INTEGER array, dimension (2, N lg N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = 1, an eigenvalue did not converge - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --indxq; - --qstore; - --qptr; - --prmptr; - --perm; - --givptr; - givcol -= 3; - givnum -= 3; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - -/* - IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN - INFO = -1 - ELSE IF( N.LT.0 ) THEN -*/ - if (*n < 0) { - *info = -1; - } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { - *info = -2; - } else if (*qsiz < *n) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -9; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLAED7", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* - The following values are for bookkeeping purposes only. They are - integer pointers which indicate the portion of the workspace - used by a particular array in DLAED2 and SLAED3. -*/ - - iz = 1; - idlmda = iz + *n; - iw = idlmda + *n; - iq = iw + *n; - - indx = 1; - indxc = indx + *n; - coltyp = indxc + *n; - indxp = coltyp + *n; - -/* - Form the z-vector which consists of the last row of Q_1 and the - first row of Q_2. -*/ - - ptr = pow_ii(&c__2, tlvls) + 1; - i__1 = *curlvl - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *tlvls - i__; - ptr += pow_ii(&c__2, &i__2); -/* L10: */ - } - curr = ptr + *curpbm; - dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & - givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ - iz + *n], info); - -/* - When solving the final problem, we no longer need the stored data, - so we will overwrite the data from this level onto the previously - used storage space. -*/ - - if (*curlvl == *tlvls) { - qptr[curr] = 1; - prmptr[curr] = 1; - givptr[curr] = 1; - } - -/* Sort and Deflate eigenvalues. */ - - zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], - &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ - indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ - (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); - prmptr[curr + 1] = prmptr[curr] + *n; - givptr[curr + 1] += givptr[curr]; - -/* Solve Secular Equation. */ - - if (k != 0) { - dlaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] - , &rwork[iw], &qstore[qptr[curr]], &k, info); - zlacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ - q_offset], ldq, &rwork[iq]); -/* Computing 2nd power */ - i__1 = k; - qptr[curr + 1] = qptr[curr] + i__1 * i__1; - if (*info != 0) { - return 0; - } - -/* Prepare the INDXQ sorting premutation. */ - - n1 = k; - n2 = *n - k; - ind1 = 1; - ind2 = *n; - dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); - } else { - qptr[curr + 1] = qptr[curr]; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - indxq[i__] = i__; -/* L20: */ - } - } - - return 0; - -/* End of ZLAED7 */ - -} /* zlaed7_ */ - -/* Subroutine */ int zlaed8_(integer *k, integer *n, integer *qsiz, - doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, - integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * - q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, - integer *indxq, integer *perm, integer *givptr, integer *givcol, - doublereal *givnum, integer *info) -{ - /* System generated locals */ - integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; - doublereal d__1; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static doublereal c__; - static integer i__, j; - static doublereal s, t; - static integer k2, n1, n2, jp, n1p1; - static doublereal eps, tau, tol; - static integer jlam, imax, jmax; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *), dcopy_(integer *, doublereal *, integer *, doublereal - *, integer *), zdrot_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( - integer *, doublecomplex *, integer *, doublecomplex *, integer *) - ; - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, - integer *, integer *, integer *), xerbla_(char *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, - Courant Institute, NAG Ltd., and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLAED8 merges the two sets of eigenvalues together into a single - sorted set. Then it tries to deflate the size of the problem. - There are two ways in which deflation can occur: when two or more - eigenvalues are close together or if there is a tiny element in the - Z vector. For each such occurrence the order of the related secular - equation problem is reduced by one. - - Arguments - ========= - - K (output) INTEGER - Contains the number of non-deflated eigenvalues. - This is the order of the related secular equation. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - QSIZ (input) INTEGER - The dimension of the unitary matrix used to reduce - the dense or band matrix to tridiagonal form. - QSIZ >= N if ICOMPQ = 1. - - Q (input/output) COMPLEX*16 array, dimension (LDQ,N) - On entry, Q contains the eigenvectors of the partially solved - system which has been previously updated in matrix - multiplies with other partially solved eigensystems. - On exit, Q contains the trailing (N-K) updated eigenvectors - (those which were deflated) in its last N-K columns. - - LDQ (input) INTEGER - The leading dimension of the array Q. LDQ >= max( 1, N ). - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, D contains the eigenvalues of the two submatrices to - be combined. On exit, D contains the trailing (N-K) updated - eigenvalues (those which were deflated) sorted into increasing - order. - - RHO (input/output) DOUBLE PRECISION - Contains the off diagonal element associated with the rank-1 - cut which originally split the two submatrices which are now - being recombined. RHO is modified during the computation to - the value required by DLAED3. - - CUTPNT (input) INTEGER - Contains the location of the last eigenvalue in the leading - sub-matrix. MIN(1,N) <= CUTPNT <= N. - - Z (input) DOUBLE PRECISION array, dimension (N) - On input this vector contains the updating vector (the last - row of the first sub-eigenvector matrix and the first row of - the second sub-eigenvector matrix). The contents of Z are - destroyed during the updating process. - - DLAMDA (output) DOUBLE PRECISION array, dimension (N) - Contains a copy of the first K eigenvalues which will be used - by DLAED3 to form the secular equation. - - Q2 (output) COMPLEX*16 array, dimension (LDQ2,N) - If ICOMPQ = 0, Q2 is not referenced. Otherwise, - Contains a copy of the first K eigenvectors which will be used - by DLAED7 in a matrix multiply (DGEMM) to update the new - eigenvectors. - - LDQ2 (input) INTEGER - The leading dimension of the array Q2. LDQ2 >= max( 1, N ). - - W (output) DOUBLE PRECISION array, dimension (N) - This will hold the first k values of the final - deflation-altered z-vector and will be passed to DLAED3. - - INDXP (workspace) INTEGER array, dimension (N) - This will contain the permutation used to place deflated - values of D at the end of the array. On output INDXP(1:K) - points to the nondeflated D-values and INDXP(K+1:N) - points to the deflated eigenvalues. - - INDX (workspace) INTEGER array, dimension (N) - This will contain the permutation used to sort the contents of - D into ascending order. - - INDXQ (input) INTEGER array, dimension (N) - This contains the permutation which separately sorts the two - sub-problems in D into ascending order. Note that elements in - the second half of this permutation must first have CUTPNT - added to their values in order to be accurate. - - PERM (output) INTEGER array, dimension (N) - Contains the permutations (from deflation and sorting) to be - applied to each eigenblock. - - GIVPTR (output) INTEGER - Contains the number of Givens rotations which took place in - this subproblem. - - GIVCOL (output) INTEGER array, dimension (2, N) - Each pair of numbers indicates a pair of columns to take place - in a Givens rotation. - - GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) - Each number indicates the S value to be used in the - corresponding Givens rotation. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - q_dim1 = *ldq; - q_offset = 1 + q_dim1; - q -= q_offset; - --d__; - --z__; - --dlamda; - q2_dim1 = *ldq2; - q2_offset = 1 + q2_dim1; - q2 -= q2_offset; - --w; - --indxp; - --indx; - --indxq; - --perm; - givcol -= 3; - givnum -= 3; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -2; - } else if (*qsiz < *n) { - *info = -3; - } else if (*ldq < max(1,*n)) { - *info = -5; - } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { - *info = -8; - } else if (*ldq2 < max(1,*n)) { - *info = -12; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLAED8", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - n1 = *cutpnt; - n2 = *n - n1; - n1p1 = n1 + 1; - - if (*rho < 0.) { - dscal_(&n2, &c_b1294, &z__[n1p1], &c__1); - } - -/* Normalize z so that norm(z) = 1 */ - - t = 1. / sqrt(2.); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - indx[j] = j; -/* L10: */ - } - dscal_(n, &t, &z__[1], &c__1); - *rho = (d__1 = *rho * 2., abs(d__1)); - -/* Sort the eigenvalues into increasing order */ - - i__1 = *n; - for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { - indxq[i__] += *cutpnt; -/* L20: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - dlamda[i__] = d__[indxq[i__]]; - w[i__] = z__[indxq[i__]]; -/* L30: */ - } - i__ = 1; - j = *cutpnt + 1; - dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - d__[i__] = dlamda[indx[i__]]; - z__[i__] = w[indx[i__]]; -/* L40: */ - } - -/* Calculate the allowable deflation tolerance */ - - imax = idamax_(n, &z__[1], &c__1); - jmax = idamax_(n, &d__[1], &c__1); - eps = EPSILON; - tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); - -/* - If the rank-1 modifier is small enough, no more needs to be done - -- except to reorganize Q so that its columns correspond with the - elements in D. -*/ - - if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { - *k = 0; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - perm[j] = indxq[indx[j]]; - zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] - , &c__1); -/* L50: */ - } - zlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); - return 0; - } - -/* - If there are multiple eigenvalues then the problem deflates. Here - the number of equal eigenvalues are found. As each equal - eigenvalue is found, an elementary reflector is computed to rotate - the corresponding eigensubspace so that the corresponding - components of Z are zero in this new basis. -*/ - - *k = 0; - *givptr = 0; - k2 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - if (j == *n) { - goto L100; - } - } else { - jlam = j; - goto L70; - } -/* L60: */ - } -L70: - ++j; - if (j > *n) { - goto L90; - } - if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { - -/* Deflate due to small z component. */ - - --k2; - indxp[k2] = j; - } else { - -/* Check if eigenvalues are close enough to allow deflation. */ - - s = z__[jlam]; - c__ = z__[j]; - -/* - Find sqrt(a**2+b**2) without overflow or - destructive underflow. -*/ - - tau = dlapy2_(&c__, &s); - t = d__[j] - d__[jlam]; - c__ /= tau; - s = -s / tau; - if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { - -/* Deflation is possible. */ - - z__[j] = tau; - z__[jlam] = 0.; - -/* Record the appropriate Givens rotation */ - - ++(*givptr); - givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; - givcol[(*givptr << 1) + 2] = indxq[indx[j]]; - givnum[(*givptr << 1) + 1] = c__; - givnum[(*givptr << 1) + 2] = s; - zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ - indx[j]] * q_dim1 + 1], &c__1, &c__, &s); - t = d__[jlam] * c__ * c__ + d__[j] * s * s; - d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; - d__[jlam] = t; - --k2; - i__ = 1; -L80: - if (k2 + i__ <= *n) { - if (d__[jlam] < d__[indxp[k2 + i__]]) { - indxp[k2 + i__ - 1] = indxp[k2 + i__]; - indxp[k2 + i__] = jlam; - ++i__; - goto L80; - } else { - indxp[k2 + i__ - 1] = jlam; - } - } else { - indxp[k2 + i__ - 1] = jlam; - } - jlam = j; - } else { - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - jlam = j; - } - } - goto L70; -L90: - -/* Record the last eigenvalue. */ - - ++(*k); - w[*k] = z__[jlam]; - dlamda[*k] = d__[jlam]; - indxp[*k] = jlam; - -L100: - -/* - Sort the eigenvalues and corresponding eigenvectors into DLAMDA - and Q2 respectively. The eigenvalues/vectors which were not - deflated go into the first K slots of DLAMDA and Q2 respectively, - while those which were deflated go into the last N - K slots. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - jp = indxp[j]; - dlamda[j] = d__[jp]; - perm[j] = indxq[indx[jp]]; - zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & - c__1); -/* L110: */ - } - -/* - The deflated eigenvalues and their corresponding vectors go back - into the last N - K slots of D and Q respectively. -*/ - - if (*k < *n) { - i__1 = *n - *k; - dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); - i__1 = *n - *k; - zlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + - 1) * q_dim1 + 1], ldq); - } - - return 0; - -/* End of ZLAED8 */ - -} /* zlaed8_ */ - -/* Subroutine */ int zlahqr_(logical *wantt, logical *wantz, integer *n, - integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh, - doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, - integer *ldz, integer *info) -{ - /* System generated locals */ - integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1, d__2, d__3, d__4, d__5, d__6; - doublecomplex z__1, z__2, z__3, z__4; - - /* Builtin functions */ - double d_imag(doublecomplex *); - void z_sqrt(doublecomplex *, doublecomplex *), d_cnjg(doublecomplex *, - doublecomplex *); - double z_abs(doublecomplex *); - - /* Local variables */ - static integer i__, j, k, l, m; - static doublereal s; - static doublecomplex t, u, v[2], x, y; - static integer i1, i2; - static doublecomplex t1; - static doublereal t2; - static doublecomplex v2; - static doublereal h10; - static doublecomplex h11; - static doublereal h21; - static doublecomplex h22; - static integer nh, nz; - static doublecomplex h11s; - static integer itn, its; - static doublereal ulp; - static doublecomplex sum; - static doublereal tst1; - static doublecomplex temp; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *); - static doublereal rtemp, rwork[1]; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - - extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *); - extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, - doublecomplex *); - extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, - doublereal *); - static doublereal smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLAHQR is an auxiliary routine called by ZHSEQR to update the - eigenvalues and Schur decomposition already computed by ZHSEQR, by - dealing with the Hessenberg submatrix in rows and columns ILO to IHI. - - Arguments - ========= - - WANTT (input) LOGICAL - = .TRUE. : the full Schur form T is required; - = .FALSE.: only eigenvalues are required. - - WANTZ (input) LOGICAL - = .TRUE. : the matrix of Schur vectors Z is required; - = .FALSE.: Schur vectors are not required. - - N (input) INTEGER - The order of the matrix H. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - It is assumed that H is already upper triangular in rows and - columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). - ZLAHQR works primarily with the Hessenberg submatrix in rows - and columns ILO to IHI, but applies transformations to all of - H if WANTT is .TRUE.. - 1 <= ILO <= max(1,IHI); IHI <= N. - - H (input/output) COMPLEX*16 array, dimension (LDH,N) - On entry, the upper Hessenberg matrix H. - On exit, if WANTT is .TRUE., H is upper triangular in rows - and columns ILO:IHI, with any 2-by-2 diagonal blocks in - standard form. If WANTT is .FALSE., the contents of H are - unspecified on exit. - - LDH (input) INTEGER - The leading dimension of the array H. LDH >= max(1,N). - - W (output) COMPLEX*16 array, dimension (N) - The computed eigenvalues ILO to IHI are stored in the - corresponding elements of W. If WANTT is .TRUE., the - eigenvalues are stored in the same order as on the diagonal - of the Schur form returned in H, with W(i) = H(i,i). - - ILOZ (input) INTEGER - IHIZ (input) INTEGER - Specify the rows of Z to which transformations must be - applied if WANTZ is .TRUE.. - 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. - - Z (input/output) COMPLEX*16 array, dimension (LDZ,N) - If WANTZ is .TRUE., on entry Z must contain the current - matrix Z of transformations accumulated by ZHSEQR, and on - exit Z has been updated; transformations are applied only to - the submatrix Z(ILOZ:IHIZ,ILO:IHI). - If WANTZ is .FALSE., Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - > 0: if INFO = i, ZLAHQR failed to compute all the - eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) - iterations; elements i+1:ihi of W contain those - eigenvalues which have been successfully computed. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - h_dim1 = *ldh; - h_offset = 1 + h_dim1; - h__ -= h_offset; - --w; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - - /* Function Body */ - *info = 0; - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*ilo == *ihi) { - i__1 = *ilo; - i__2 = *ilo + *ilo * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - return 0; - } - - nh = *ihi - *ilo + 1; - nz = *ihiz - *iloz + 1; - -/* - Set machine-dependent constants for the stopping criterion. - If norm(H) <= sqrt(OVFL), overflow should not occur. -*/ - - ulp = PRECISION; - smlnum = SAFEMINIMUM / ulp; - -/* - I1 and I2 are the indices of the first row and last column of H - to which transformations must be applied. If eigenvalues only are - being computed, I1 and I2 are set inside the main loop. -*/ - - if (*wantt) { - i1 = 1; - i2 = *n; - } - -/* ITN is the total number of QR iterations allowed. */ - - itn = nh * 30; - -/* - The main loop begins here. I is the loop index and decreases from - IHI to ILO in steps of 1. Each iteration of the loop works - with the active submatrix in rows and columns L to I. - Eigenvalues I+1 to IHI have already converged. Either L = ILO, or - H(L,L-1) is negligible so that the matrix splits. -*/ - - i__ = *ihi; -L10: - if (i__ < *ilo) { - goto L130; - } - -/* - Perform QR iterations on rows and columns ILO to I until a - submatrix of order 1 splits off at the bottom because a - subdiagonal element has become negligible. -*/ - - l = *ilo; - i__1 = itn; - for (its = 0; its <= i__1; ++its) { - -/* Look for a single small subdiagonal element. */ - - i__2 = l + 1; - for (k = i__; k >= i__2; --k) { - i__3 = k - 1 + (k - 1) * h_dim1; - i__4 = k + k * h_dim1; - tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k - - 1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__4].r, - abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs( - d__4))); - if (tst1 == 0.) { - i__3 = i__ - l + 1; - tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork); - } - i__3 = k + (k - 1) * h_dim1; -/* Computing MAX */ - d__2 = ulp * tst1; - if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) { - goto L30; - } -/* L20: */ - } -L30: - l = k; - if (l > *ilo) { - -/* H(L,L-1) is negligible */ - - i__2 = l + (l - 1) * h_dim1; - h__[i__2].r = 0., h__[i__2].i = 0.; - } - -/* Exit from loop if a submatrix of order 1 has split off. */ - - if (l >= i__) { - goto L120; - } - -/* - Now the active submatrix is in rows and columns L to I. If - eigenvalues only are being computed, only the active submatrix - need be transformed. -*/ - - if (! (*wantt)) { - i1 = l; - i2 = i__; - } - - if (its == 10 || its == 20) { - -/* Exceptional shift. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - s = (d__1 = h__[i__2].r, abs(d__1)) * .75; - i__2 = i__ + i__ * h_dim1; - z__1.r = s + h__[i__2].r, z__1.i = h__[i__2].i; - t.r = z__1.r, t.i = z__1.i; - } else { - -/* Wilkinson's shift. */ - - i__2 = i__ + i__ * h_dim1; - t.r = h__[i__2].r, t.i = h__[i__2].i; - i__2 = i__ - 1 + i__ * h_dim1; - i__3 = i__ + (i__ - 1) * h_dim1; - d__1 = h__[i__3].r; - z__1.r = d__1 * h__[i__2].r, z__1.i = d__1 * h__[i__2].i; - u.r = z__1.r, u.i = z__1.i; - if (u.r != 0. || u.i != 0.) { - i__2 = i__ - 1 + (i__ - 1) * h_dim1; - z__2.r = h__[i__2].r - t.r, z__2.i = h__[i__2].i - t.i; - z__1.r = z__2.r * .5, z__1.i = z__2.i * .5; - x.r = z__1.r, x.i = z__1.i; - z__3.r = x.r * x.r - x.i * x.i, z__3.i = x.r * x.i + x.i * - x.r; - z__2.r = z__3.r + u.r, z__2.i = z__3.i + u.i; - z_sqrt(&z__1, &z__2); - y.r = z__1.r, y.i = z__1.i; - if (x.r * y.r + d_imag(&x) * d_imag(&y) < 0.) { - z__1.r = -y.r, z__1.i = -y.i; - y.r = z__1.r, y.i = z__1.i; - } - z__3.r = x.r + y.r, z__3.i = x.i + y.i; - zladiv_(&z__2, &u, &z__3); - z__1.r = t.r - z__2.r, z__1.i = t.i - z__2.i; - t.r = z__1.r, t.i = z__1.i; - } - } - -/* Look for two consecutive small subdiagonal elements. */ - - i__2 = l + 1; - for (m = i__ - 1; m >= i__2; --m) { - -/* - Determine the effect of starting the single-shift QR - iteration at row M, and see if this would make H(M,M-1) - negligible. -*/ - - i__3 = m + m * h_dim1; - h11.r = h__[i__3].r, h11.i = h__[i__3].i; - i__3 = m + 1 + (m + 1) * h_dim1; - h22.r = h__[i__3].r, h22.i = h__[i__3].i; - z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; - h11s.r = z__1.r, h11s.i = z__1.i; - i__3 = m + 1 + m * h_dim1; - h21 = h__[i__3].r; - s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) - + abs(h21); - z__1.r = h11s.r / s, z__1.i = h11s.i / s; - h11s.r = z__1.r, h11s.i = z__1.i; - h21 /= s; - v[0].r = h11s.r, v[0].i = h11s.i; - v[1].r = h21, v[1].i = 0.; - i__3 = m + (m - 1) * h_dim1; - h10 = h__[i__3].r; - tst1 = ((d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs( - d__2))) * ((d__3 = h11.r, abs(d__3)) + (d__4 = d_imag(& - h11), abs(d__4)) + ((d__5 = h22.r, abs(d__5)) + (d__6 = - d_imag(&h22), abs(d__6)))); - if ((d__1 = h10 * h21, abs(d__1)) <= ulp * tst1) { - goto L50; - } -/* L40: */ - } - i__2 = l + l * h_dim1; - h11.r = h__[i__2].r, h11.i = h__[i__2].i; - i__2 = l + 1 + (l + 1) * h_dim1; - h22.r = h__[i__2].r, h22.i = h__[i__2].i; - z__1.r = h11.r - t.r, z__1.i = h11.i - t.i; - h11s.r = z__1.r, h11s.i = z__1.i; - i__2 = l + 1 + l * h_dim1; - h21 = h__[i__2].r; - s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) + - abs(h21); - z__1.r = h11s.r / s, z__1.i = h11s.i / s; - h11s.r = z__1.r, h11s.i = z__1.i; - h21 /= s; - v[0].r = h11s.r, v[0].i = h11s.i; - v[1].r = h21, v[1].i = 0.; -L50: - -/* Single-shift QR step */ - - i__2 = i__ - 1; - for (k = m; k <= i__2; ++k) { - -/* - The first iteration of this loop determines a reflection G - from the vector V and applies it from left and right to H, - thus creating a nonzero bulge below the subdiagonal. - - Each subsequent iteration determines a reflection G to - restore the Hessenberg form in the (K-1)th column, and thus - chases the bulge one step toward the bottom of the active - submatrix. - - V(2) is always real before the call to ZLARFG, and hence - after the call T2 ( = T1*V(2) ) is also real. -*/ - - if (k > m) { - zcopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); - } - zlarfg_(&c__2, v, &v[1], &c__1, &t1); - if (k > m) { - i__3 = k + (k - 1) * h_dim1; - h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; - i__3 = k + 1 + (k - 1) * h_dim1; - h__[i__3].r = 0., h__[i__3].i = 0.; - } - v2.r = v[1].r, v2.i = v[1].i; - z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i + t1.i * - v2.r; - t2 = z__1.r; - -/* - Apply G from the left to transform the rows of the matrix - in columns K to I2. -*/ - - i__3 = i2; - for (j = k; j <= i__3; ++j) { - d_cnjg(&z__3, &t1); - i__4 = k + j * h_dim1; - z__2.r = z__3.r * h__[i__4].r - z__3.i * h__[i__4].i, z__2.i = - z__3.r * h__[i__4].i + z__3.i * h__[i__4].r; - i__5 = k + 1 + j * h_dim1; - z__4.r = t2 * h__[i__5].r, z__4.i = t2 * h__[i__5].i; - z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; - sum.r = z__1.r, sum.i = z__1.i; - i__4 = k + j * h_dim1; - i__5 = k + j * h_dim1; - z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i; - h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; - i__4 = k + 1 + j * h_dim1; - i__5 = k + 1 + j * h_dim1; - z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = sum.r * v2.i + - sum.i * v2.r; - z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i; - h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; -/* L60: */ - } - -/* - Apply G from the right to transform the columns of the - matrix in rows I1 to min(K+2,I). - - Computing MIN -*/ - i__4 = k + 2; - i__3 = min(i__4,i__); - for (j = i1; j <= i__3; ++j) { - i__4 = j + k * h_dim1; - z__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, z__2.i = - t1.r * h__[i__4].i + t1.i * h__[i__4].r; - i__5 = j + (k + 1) * h_dim1; - z__3.r = t2 * h__[i__5].r, z__3.i = t2 * h__[i__5].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - sum.r = z__1.r, sum.i = z__1.i; - i__4 = j + k * h_dim1; - i__5 = j + k * h_dim1; - z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i; - h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; - i__4 = j + (k + 1) * h_dim1; - i__5 = j + (k + 1) * h_dim1; - d_cnjg(&z__3, &v2); - z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * - z__3.i + sum.i * z__3.r; - z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i; - h__[i__4].r = z__1.r, h__[i__4].i = z__1.i; -/* L70: */ - } - - if (*wantz) { - -/* Accumulate transformations in the matrix Z */ - - i__3 = *ihiz; - for (j = *iloz; j <= i__3; ++j) { - i__4 = j + k * z_dim1; - z__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, z__2.i = - t1.r * z__[i__4].i + t1.i * z__[i__4].r; - i__5 = j + (k + 1) * z_dim1; - z__3.r = t2 * z__[i__5].r, z__3.i = t2 * z__[i__5].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - sum.r = z__1.r, sum.i = z__1.i; - i__4 = j + k * z_dim1; - i__5 = j + k * z_dim1; - z__1.r = z__[i__5].r - sum.r, z__1.i = z__[i__5].i - - sum.i; - z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; - i__4 = j + (k + 1) * z_dim1; - i__5 = j + (k + 1) * z_dim1; - d_cnjg(&z__3, &v2); - z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r * - z__3.i + sum.i * z__3.r; - z__1.r = z__[i__5].r - z__2.r, z__1.i = z__[i__5].i - - z__2.i; - z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; -/* L80: */ - } - } - - if (k == m && m > l) { - -/* - If the QR step was started at row M > L because two - consecutive small subdiagonals were found, then extra - scaling must be performed to ensure that H(M,M-1) remains - real. -*/ - - z__1.r = 1. - t1.r, z__1.i = 0. - t1.i; - temp.r = z__1.r, temp.i = z__1.i; - d__1 = z_abs(&temp); - z__1.r = temp.r / d__1, z__1.i = temp.i / d__1; - temp.r = z__1.r, temp.i = z__1.i; - i__3 = m + 1 + m * h_dim1; - i__4 = m + 1 + m * h_dim1; - d_cnjg(&z__2, &temp); - z__1.r = h__[i__4].r * z__2.r - h__[i__4].i * z__2.i, z__1.i = - h__[i__4].r * z__2.i + h__[i__4].i * z__2.r; - h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; - if (m + 2 <= i__) { - i__3 = m + 2 + (m + 1) * h_dim1; - i__4 = m + 2 + (m + 1) * h_dim1; - z__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i, - z__1.i = h__[i__4].r * temp.i + h__[i__4].i * - temp.r; - h__[i__3].r = z__1.r, h__[i__3].i = z__1.i; - } - i__3 = i__; - for (j = m; j <= i__3; ++j) { - if (j != m + 1) { - if (i2 > j) { - i__4 = i2 - j; - zscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1], - ldh); - } - i__4 = j - i1; - d_cnjg(&z__1, &temp); - zscal_(&i__4, &z__1, &h__[i1 + j * h_dim1], &c__1); - if (*wantz) { - d_cnjg(&z__1, &temp); - zscal_(&nz, &z__1, &z__[*iloz + j * z_dim1], & - c__1); - } - } -/* L90: */ - } - } -/* L100: */ - } - -/* Ensure that H(I,I-1) is real. */ - - i__2 = i__ + (i__ - 1) * h_dim1; - temp.r = h__[i__2].r, temp.i = h__[i__2].i; - if (d_imag(&temp) != 0.) { - rtemp = z_abs(&temp); - i__2 = i__ + (i__ - 1) * h_dim1; - h__[i__2].r = rtemp, h__[i__2].i = 0.; - z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp; - temp.r = z__1.r, temp.i = z__1.i; - if (i2 > i__) { - i__2 = i2 - i__; - d_cnjg(&z__1, &temp); - zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh); - } - i__2 = i__ - i1; - zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1); - if (*wantz) { - zscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1); - } - } - -/* L110: */ - } - -/* Failure to converge in remaining number of iterations */ - - *info = i__; - return 0; - -L120: - -/* H(I,I-1) is negligible: one eigenvalue has converged. */ - - i__1 = i__; - i__2 = i__ + i__ * h_dim1; - w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; - -/* - Decrement number of remaining iterations, and return to start of - the main loop with new value of I. -*/ - - itn -= its; - i__ = l - 1; - goto L10; - -L130: - return 0; - -/* End of ZLAHQR */ - -} /* zlahqr_ */ - -/* Subroutine */ int zlahrd_(integer *n, integer *k, integer *nb, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, - integer *ldt, doublecomplex *y, integer *ldy) -{ - /* System generated locals */ - integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, - i__3; - doublecomplex z__1; - - /* Local variables */ - static integer i__; - static doublecomplex ei; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), - zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, - integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, integer *), ztrmv_(char *, char *, - char *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *), zlarfg_(integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *), - zlacgv_(integer *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) - matrix A so that elements below the k-th subdiagonal are zero. The - reduction is performed by a unitary similarity transformation - Q' * A * Q. The routine returns the matrices V and T which determine - Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. - - This is an auxiliary routine called by ZGEHRD. - - Arguments - ========= - - N (input) INTEGER - The order of the matrix A. - - K (input) INTEGER - The offset for the reduction. Elements below the k-th - subdiagonal in the first NB columns are reduced to zero. - - NB (input) INTEGER - The number of columns to be reduced. - - A (input/output) COMPLEX*16 array, dimension (LDA,N-K+1) - On entry, the n-by-(n-k+1) general matrix A. - On exit, the elements on and above the k-th subdiagonal in - the first NB columns are overwritten with the corresponding - elements of the reduced matrix; the elements below the k-th - subdiagonal, with the array TAU, represent the matrix Q as a - product of elementary reflectors. The other columns of A are - unchanged. See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (output) COMPLEX*16 array, dimension (NB) - The scalar factors of the elementary reflectors. See Further - Details. - - T (output) COMPLEX*16 array, dimension (LDT,NB) - The upper triangular matrix T. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= NB. - - Y (output) COMPLEX*16 array, dimension (LDY,NB) - The n-by-nb matrix Y. - - LDY (input) INTEGER - The leading dimension of the array Y. LDY >= max(1,N). - - Further Details - =============== - - The matrix Q is represented as a product of nb elementary reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in - A(i+k+1:n,i), and tau in TAU(i). - - The elements of the vectors v together form the (n-k+1)-by-nb matrix - V which is needed, with T and Y, to apply the transformation to the - unreduced part of the matrix, using an update of the form: - A := (I - V*T*V') * (A - Y*V'). - - The contents of A on exit are illustrated by the following example - with n = 7, k = 3 and nb = 2: - - ( a h a a a ) - ( a h a a a ) - ( a h a a a ) - ( h h a a a ) - ( v1 h a a a ) - ( v1 v2 a a a ) - ( v1 v2 a a a ) - - where a denotes an element of the original matrix A, h denotes a - modified element of the upper Hessenberg matrix H, and vi denotes an - element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - --tau; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - y_dim1 = *ldy; - y_offset = 1 + y_dim1; - y -= y_offset; - - /* Function Body */ - if (*n <= 1) { - return 0; - } - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - if (i__ > 1) { - -/* - Update A(1:n,i) - - Compute i-th column of A - Y * V' -*/ - - i__2 = i__ - 1; - zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); - i__2 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &a[*k - + i__ - 1 + a_dim1], lda, &c_b60, &a[i__ * a_dim1 + 1], & - c__1); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda); - -/* - Apply I - V * T' * V' to this column (call it b) from the - left, using the last column of T as workspace - - Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) - ( V2 ) ( b2 ) - - where V1 is unit lower triangular - - w := V1' * b1 -*/ - - i__2 = i__ - 1; - zcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + - 1], &c__1); - i__2 = i__ - 1; - ztrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + - a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1); - -/* w := w + V2'*b2 */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ + - a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b60, - &t[*nb * t_dim1 + 1], &c__1); - -/* w := T'*w */ - - i__2 = i__ - 1; - ztrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1); - -/* b2 := b2 - V2*w */ - - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1], - lda, &t[*nb * t_dim1 + 1], &c__1, &c_b60, &a[*k + i__ + - i__ * a_dim1], &c__1); - -/* b1 := b1 - V1*w */ - - i__2 = i__ - 1; - ztrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1] - , lda, &t[*nb * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zaxpy_(&i__2, &z__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ - * a_dim1], &c__1); - - i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1; - a[i__2].r = ei.r, a[i__2].i = ei.i; - } - -/* - Generate the elementary reflector H(i) to annihilate - A(k+i+1:n,i) -*/ - - i__2 = *k + i__ + i__ * a_dim1; - ei.r = a[i__2].r, ei.i = a[i__2].i; - i__2 = *n - *k - i__ + 1; -/* Computing MIN */ - i__3 = *k + i__ + 1; - zlarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__]) - ; - i__2 = *k + i__ + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute Y(1:n,i) */ - - i__2 = *n - *k - i__ + 1; - zgemv_("No transpose", n, &i__2, &c_b60, &a[(i__ + 1) * a_dim1 + 1], - lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &y[i__ * - y_dim1 + 1], &c__1); - i__2 = *n - *k - i__ + 1; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ + - a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &t[ - i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &t[i__ * - t_dim1 + 1], &c__1, &c_b60, &y[i__ * y_dim1 + 1], &c__1); - zscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1); - -/* Compute T(1:i,i) */ - - i__2 = i__ - 1; - i__3 = i__; - z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; - zscal_(&i__2, &z__1, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ - 1; - ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, - &t[i__ * t_dim1 + 1], &c__1) - ; - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - -/* L10: */ - } - i__1 = *k + *nb + *nb * a_dim1; - a[i__1].r = ei.r, a[i__1].i = ei.i; - - return 0; - -/* End of ZLAHRD */ - -} /* zlahrd_ */ - -/* Subroutine */ int zlals0_(integer *icompq, integer *nl, integer *nr, - integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb, - doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr, - integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum, - doublereal *poles, doublereal *difl, doublereal *difr, doublereal * - z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork, - integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, - givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, - bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - - /* Local variables */ - static integer i__, j, m, n; - static doublereal dj; - static integer nlp1, jcol; - static doublereal temp; - static integer jrow; - extern doublereal dnrm2_(integer *, doublereal *, integer *); - static doublereal diflj, difrj, dsigj; - extern /* Subroutine */ int dgemv_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, doublereal *, integer *, - doublereal *, doublereal *, integer *), zdrot_(integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublereal *, doublereal *); - extern doublereal dlamc3_(doublereal *, doublereal *); - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), xerbla_(char *, integer *); - static doublereal dsigjp; - extern /* Subroutine */ int zdscal_(integer *, doublereal *, - doublecomplex *, integer *), zlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublecomplex * - , integer *, integer *), zlacpy_(char *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - December 1, 1999 - - - Purpose - ======= - - ZLALS0 applies back the multiplying factors of either the left or the - right singular vector matrix of a diagonal matrix appended by a row - to the right hand side matrix B in solving the least squares problem - using the divide-and-conquer SVD approach. - - For the left singular vector matrix, three types of orthogonal - matrices are involved: - - (1L) Givens rotations: the number of such rotations is GIVPTR; the - pairs of columns/rows they were applied to are stored in GIVCOL; - and the C- and S-values of these rotations are stored in GIVNUM. - - (2L) Permutation. The (NL+1)-st row of B is to be moved to the first - row, and for J=2:N, PERM(J)-th row of B is to be moved to the - J-th row. - - (3L) The left singular vector matrix of the remaining matrix. - - For the right singular vector matrix, four types of orthogonal - matrices are involved: - - (1R) The right singular vector matrix of the remaining matrix. - - (2R) If SQRE = 1, one extra Givens rotation to generate the right - null space. - - (3R) The inverse transformation of (2L). - - (4R) The inverse transformation of (1L). - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether singular vectors are to be computed in - factored form: - = 0: Left singular vector matrix. - = 1: Right singular vector matrix. - - NL (input) INTEGER - The row dimension of the upper block. NL >= 1. - - NR (input) INTEGER - The row dimension of the lower block. NR >= 1. - - SQRE (input) INTEGER - = 0: the lower block is an NR-by-NR square matrix. - = 1: the lower block is an NR-by-(NR+1) rectangular matrix. - - The bidiagonal matrix has row dimension N = NL + NR + 1, - and column dimension M = N + SQRE. - - NRHS (input) INTEGER - The number of columns of B and BX. NRHS must be at least 1. - - B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS ) - On input, B contains the right hand sides of the least - squares problem in rows 1 through M. On output, B contains - the solution X in rows 1 through N. - - LDB (input) INTEGER - The leading dimension of B. LDB must be at least - max(1,MAX( M, N ) ). - - BX (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS ) - - LDBX (input) INTEGER - The leading dimension of BX. - - PERM (input) INTEGER array, dimension ( N ) - The permutations (from deflation and sorting) applied - to the two blocks. - - GIVPTR (input) INTEGER - The number of Givens rotations which took place in this - subproblem. - - GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) - Each pair of numbers indicates a pair of rows/columns - involved in a Givens rotation. - - LDGCOL (input) INTEGER - The leading dimension of GIVCOL, must be at least N. - - GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - Each number indicates the C or S value used in the - corresponding Givens rotation. - - LDGNUM (input) INTEGER - The leading dimension of arrays DIFR, POLES and - GIVNUM, must be at least K. - - POLES (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) - On entry, POLES(1:K, 1) contains the new singular - values obtained from solving the secular equation, and - POLES(1:K, 2) is an array containing the poles in the secular - equation. - - DIFL (input) DOUBLE PRECISION array, dimension ( K ). - On entry, DIFL(I) is the distance between I-th updated - (undeflated) singular value and the I-th (undeflated) old - singular value. - - DIFR (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). - On entry, DIFR(I, 1) contains the distances between I-th - updated (undeflated) singular value and the I+1-th - (undeflated) old singular value. And DIFR(I, 2) is the - normalizing factor for the I-th right singular vector. - - Z (input) DOUBLE PRECISION array, dimension ( K ) - Contain the components of the deflation-adjusted updating row - vector. - - K (input) INTEGER - Contains the dimension of the non-deflated matrix, - This is the order of the related secular equation. 1 <= K <=N. - - C (input) DOUBLE PRECISION - C contains garbage if SQRE =0 and the C-value of a Givens - rotation related to the right null space if SQRE = 1. - - S (input) DOUBLE PRECISION - S contains garbage if SQRE =0 and the S-value of a Givens - rotation related to the right null space if SQRE = 1. - - RWORK (workspace) DOUBLE PRECISION array, dimension - ( K*(1+NRHS) + 2*NRHS ) - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - bx_dim1 = *ldbx; - bx_offset = 1 + bx_dim1; - bx -= bx_offset; - --perm; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - difr_dim1 = *ldgnum; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - poles_dim1 = *ldgnum; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - givnum_dim1 = *ldgnum; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - --difl; - --z__; - --rwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*nl < 1) { - *info = -2; - } else if (*nr < 1) { - *info = -3; - } else if (*sqre < 0 || *sqre > 1) { - *info = -4; - } - - n = *nl + *nr + 1; - - if (*nrhs < 1) { - *info = -5; - } else if (*ldb < n) { - *info = -7; - } else if (*ldbx < n) { - *info = -9; - } else if (*givptr < 0) { - *info = -11; - } else if (*ldgcol < n) { - *info = -13; - } else if (*ldgnum < n) { - *info = -15; - } else if (*k < 1) { - *info = -20; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLALS0", &i__1); - return 0; - } - - m = n + *sqre; - nlp1 = *nl + 1; - - if (*icompq == 0) { - -/* - Apply back orthogonal transformations from the left. - - Step (1L): apply back the Givens rotations performed. -*/ - - i__1 = *givptr; - for (i__ = 1; i__ <= i__1; ++i__) { - zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & - b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + - (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); -/* L10: */ - } - -/* Step (2L): permute rows of B. */ - - zcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - zcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], - ldbx); -/* L20: */ - } - -/* - Step (3L): apply the inverse of the left singular vector - matrix to BX. -*/ - - if (*k == 1) { - zcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); - if (z__[1] < 0.) { - zdscal_(nrhs, &c_b1294, &b[b_offset], ldb); - } - } else { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - diflj = difl[j]; - dj = poles[j + poles_dim1]; - dsigj = -poles[j + (poles_dim1 << 1)]; - if (j < *k) { - difrj = -difr[j + difr_dim1]; - dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; - } - if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { - rwork[j] = 0.; - } else { - rwork[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj - / (poles[j + (poles_dim1 << 1)] + dj); - } - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == - 0.) { - rwork[i__] = 0.; - } else { - rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] - / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & - dsigj) - diflj) / (poles[i__ + (poles_dim1 << - 1)] + dj); - } -/* L30: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == - 0.) { - rwork[i__] = 0.; - } else { - rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] - / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & - dsigjp) + difrj) / (poles[i__ + (poles_dim1 << - 1)] + dj); - } -/* L40: */ - } - rwork[1] = -1.; - temp = dnrm2_(k, &rwork[1], &c__1); - -/* - Since B and BX are complex, the following call to DGEMV - is performed in two steps (real and imaginary parts). - - CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, - $ B( J, 1 ), LDB ) -*/ - - i__ = *k + (*nrhs << 1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = *k; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++i__; - i__4 = jrow + jcol * bx_dim1; - rwork[i__] = bx[i__4].r; -/* L50: */ - } -/* L60: */ - } - dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], - k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1); - i__ = *k + (*nrhs << 1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = *k; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++i__; - rwork[i__] = d_imag(&bx[jrow + jcol * bx_dim1]); -/* L70: */ - } -/* L80: */ - } - dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], - k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs], - &c__1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = j + jcol * b_dim1; - i__4 = jcol + *k; - i__5 = jcol + *k + *nrhs; - z__1.r = rwork[i__4], z__1.i = rwork[i__5]; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L90: */ - } - zlascl_("G", &c__0, &c__0, &temp, &c_b1015, &c__1, nrhs, &b[j - + b_dim1], ldb, info); -/* L100: */ - } - } - -/* Move the deflated rows of BX to B also. */ - - if (*k < max(m,n)) { - i__1 = n - *k; - zlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 - + b_dim1], ldb); - } - } else { - -/* - Apply back the right orthogonal transformations. - - Step (1R): apply back the new right singular vector matrix - to B. -*/ - - if (*k == 1) { - zcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); - } else { - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - dsigj = poles[j + (poles_dim1 << 1)]; - if (z__[j] == 0.) { - rwork[j] = 0.; - } else { - rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j + - poles_dim1]) / difr[j + (difr_dim1 << 1)]; - } - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - if (z__[j] == 0.) { - rwork[i__] = 0.; - } else { - d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; - rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ - i__ + difr_dim1]) / (dsigj + poles[i__ + - poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; - } -/* L110: */ - } - i__2 = *k; - for (i__ = j + 1; i__ <= i__2; ++i__) { - if (z__[j] == 0.) { - rwork[i__] = 0.; - } else { - d__1 = -poles[i__ + (poles_dim1 << 1)]; - rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ - i__]) / (dsigj + poles[i__ + poles_dim1]) / - difr[i__ + (difr_dim1 << 1)]; - } -/* L120: */ - } - -/* - Since B and BX are complex, the following call to DGEMV - is performed in two steps (real and imaginary parts). - - CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, - $ BX( J, 1 ), LDBX ) -*/ - - i__ = *k + (*nrhs << 1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = *k; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++i__; - i__4 = jrow + jcol * b_dim1; - rwork[i__] = b[i__4].r; -/* L130: */ - } -/* L140: */ - } - dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], - k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1); - i__ = *k + (*nrhs << 1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = *k; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++i__; - rwork[i__] = d_imag(&b[jrow + jcol * b_dim1]); -/* L150: */ - } -/* L160: */ - } - dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)], - k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs], - &c__1); - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = j + jcol * bx_dim1; - i__4 = jcol + *k; - i__5 = jcol + *k + *nrhs; - z__1.r = rwork[i__4], z__1.i = rwork[i__5]; - bx[i__3].r = z__1.r, bx[i__3].i = z__1.i; -/* L170: */ - } -/* L180: */ - } - } - -/* - Step (2R): if SQRE = 1, apply back the rotation that is - related to the right null space of the subproblem. -*/ - - if (*sqre == 1) { - zcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); - zdrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, - s); - } - if (*k < max(m,n)) { - i__1 = n - *k; - zlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + - bx_dim1], ldbx); - } - -/* Step (3R): permute rows of B. */ - - zcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); - if (*sqre == 1) { - zcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); - } - i__1 = n; - for (i__ = 2; i__ <= i__1; ++i__) { - zcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], - ldb); -/* L190: */ - } - -/* Step (4R): apply back the Givens rotations performed. */ - - for (i__ = *givptr; i__ >= 1; --i__) { - d__1 = -givnum[i__ + givnum_dim1]; - zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & - b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + - (givnum_dim1 << 1)], &d__1); -/* L200: */ - } - } - - return 0; - -/* End of ZLALS0 */ - -} /* zlals0_ */ - -/* Subroutine */ int zlalsa_(integer *icompq, integer *smlsiz, integer *n, - integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx, - integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer * - k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * - poles, integer *givptr, integer *givcol, integer *ldgcol, integer * - perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * - rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, - difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, - poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, - z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, - i__2, i__3, i__4, i__5, i__6; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - integer pow_ii(integer *, integer *); - - /* Local variables */ - static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, - ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer jreal, inode, ndiml, ndimr; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zlals0_(integer *, integer *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, integer *, integer *, integer *, - integer *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, doublereal *, - doublereal *, integer *), dlasdt_(integer *, integer *, integer * - , integer *, integer *, integer *, integer *), xerbla_(char *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLALSA is an itermediate step in solving the least squares problem - by computing the SVD of the coefficient matrix in compact form (The - singular vectors are computed as products of simple orthorgonal - matrices.). - - If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector - matrix of an upper bidiagonal matrix to the right hand side; and if - ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the - right hand side. The singular vector matrices were generated in - compact form by ZLALSA. - - Arguments - ========= - - ICOMPQ (input) INTEGER - Specifies whether the left or the right singular vector - matrix is involved. - = 0: Left singular vector matrix - = 1: Right singular vector matrix - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The row and column dimensions of the upper bidiagonal matrix. - - NRHS (input) INTEGER - The number of columns of B and BX. NRHS must be at least 1. - - B (input) COMPLEX*16 array, dimension ( LDB, NRHS ) - On input, B contains the right hand sides of the least - squares problem in rows 1 through M. On output, B contains - the solution X in rows 1 through N. - - LDB (input) INTEGER - The leading dimension of B in the calling subprogram. - LDB must be at least max(1,MAX( M, N ) ). - - BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS ) - On exit, the result of applying the left or right singular - vector matrix to B. - - LDBX (input) INTEGER - The leading dimension of BX. - - U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). - On entry, U contains the left singular vector matrices of all - subproblems at the bottom level. - - LDU (input) INTEGER, LDU = > N. - The leading dimension of arrays U, VT, DIFL, DIFR, - POLES, GIVNUM, and Z. - - VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). - On entry, VT' contains the right singular vector matrices of - all subproblems at the bottom level. - - K (input) INTEGER array, dimension ( N ). - - DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). - where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. - - DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record - distances between singular values on the I-th level and - singular values on the (I -1)-th level, and DIFR(*, 2 * I) - record the normalizing factors of the right singular vectors - matrices of subproblems on I-th level. - - Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). - On entry, Z(1, I) contains the components of the deflation- - adjusted updating row vector for subproblems on the I-th - level. - - POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old - singular values involved in the secular equations on the I-th - level. - - GIVPTR (input) INTEGER array, dimension ( N ). - On entry, GIVPTR( I ) records the number of Givens - rotations performed on the I-th problem on the computation - tree. - - GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). - On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the - locations of Givens rotations performed on the I-th level on - the computation tree. - - LDGCOL (input) INTEGER, LDGCOL = > N. - The leading dimension of arrays GIVCOL and PERM. - - PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). - On entry, PERM(*, I) records permutations done on the I-th - level of the computation tree. - - GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). - On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- - values of Givens rotations performed on the I-th level on the - computation tree. - - C (input) DOUBLE PRECISION array, dimension ( N ). - On entry, if the I-th subproblem is not square, - C( I ) contains the C-value of a Givens rotation related to - the right null space of the I-th subproblem. - - S (input) DOUBLE PRECISION array, dimension ( N ). - On entry, if the I-th subproblem is not square, - S( I ) contains the S-value of a Givens rotation related to - the right null space of the I-th subproblem. - - RWORK (workspace) DOUBLE PRECISION array, dimension at least - max ( N, (SMLSZ+1)*NRHS*3 ). - - IWORK (workspace) INTEGER array. - The dimension must be at least 3 * N - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - bx_dim1 = *ldbx; - bx_offset = 1 + bx_dim1; - bx -= bx_offset; - givnum_dim1 = *ldu; - givnum_offset = 1 + givnum_dim1; - givnum -= givnum_offset; - poles_dim1 = *ldu; - poles_offset = 1 + poles_dim1; - poles -= poles_offset; - z_dim1 = *ldu; - z_offset = 1 + z_dim1; - z__ -= z_offset; - difr_dim1 = *ldu; - difr_offset = 1 + difr_dim1; - difr -= difr_offset; - difl_dim1 = *ldu; - difl_offset = 1 + difl_dim1; - difl -= difl_offset; - vt_dim1 = *ldu; - vt_offset = 1 + vt_dim1; - vt -= vt_offset; - u_dim1 = *ldu; - u_offset = 1 + u_dim1; - u -= u_offset; - --k; - --givptr; - perm_dim1 = *ldgcol; - perm_offset = 1 + perm_dim1; - perm -= perm_offset; - givcol_dim1 = *ldgcol; - givcol_offset = 1 + givcol_dim1; - givcol -= givcol_offset; - --c__; - --s; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - - if (*icompq < 0 || *icompq > 1) { - *info = -1; - } else if (*smlsiz < 3) { - *info = -2; - } else if (*n < *smlsiz) { - *info = -3; - } else if (*nrhs < 1) { - *info = -4; - } else if (*ldb < *n) { - *info = -6; - } else if (*ldbx < *n) { - *info = -8; - } else if (*ldu < *n) { - *info = -10; - } else if (*ldgcol < *n) { - *info = -19; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLALSA", &i__1); - return 0; - } - -/* Book-keeping and setting up the computation tree. */ - - inode = 1; - ndiml = inode + *n; - ndimr = ndiml + *n; - - dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], - smlsiz); - -/* - The following code applies back the left singular vector factors. - For applying back the right singular vector factors, go to 170. -*/ - - if (*icompq == 1) { - goto L170; - } - -/* - The nodes on the bottom level of the tree were solved - by DLASDQ. The corresponding left and right singular vector - matrices are in explicit form. First apply back the left - singular vector matrices. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - -/* - IC : center row of each node - NL : number of rows of left subproblem - NR : number of rows of right subproblem - NLF: starting row of the left subproblem - NRF: starting row of the right subproblem -*/ - - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nr = iwork[ndimr + i1]; - nlf = ic - nl; - nrf = ic + 1; - -/* - Since B and BX are complex, the following call to DGEMM - is performed in two steps (real and imaginary parts). - - CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, - $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) -*/ - - j = nl * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nl - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++j; - i__4 = jrow + jcol * b_dim1; - rwork[j] = b[i__4].r; -/* L10: */ - } -/* L20: */ - } - dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, & - rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[1], &nl); - j = nl * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nl - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L30: */ - } -/* L40: */ - } - dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, & - rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[nl * *nrhs - + 1], &nl); - jreal = 0; - jimag = nl * *nrhs; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nl - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * bx_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; -/* L50: */ - } -/* L60: */ - } - -/* - Since B and BX are complex, the following call to DGEMM - is performed in two steps (real and imaginary parts). - - CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, - $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) -*/ - - j = nr * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nr - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++j; - i__4 = jrow + jcol * b_dim1; - rwork[j] = b[i__4].r; -/* L70: */ - } -/* L80: */ - } - dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, & - rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[1], &nr); - j = nr * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nr - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L90: */ - } -/* L100: */ - } - dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, & - rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[nr * *nrhs - + 1], &nr); - jreal = 0; - jimag = nr * *nrhs; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nr - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * bx_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; -/* L110: */ - } -/* L120: */ - } - -/* L130: */ - } - -/* - Next copy the rows of B that correspond to unchanged rows - in the bidiagonal matrix to BX. -*/ - - i__1 = nd; - for (i__ = 1; i__ <= i__1; ++i__) { - ic = iwork[inode + i__ - 1]; - zcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); -/* L140: */ - } - -/* - Finally go through the left singular vector matrices of all - the other subproblems bottom-up on the tree. -*/ - - j = pow_ii(&c__2, &nlvl); - sqre = 0; - - for (lvl = nlvl; lvl >= 1; --lvl) { - lvl2 = (lvl << 1) - 1; - -/* - find the first node LF and last node LL on - the current level LVL -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__1 = lvl - 1; - lf = pow_ii(&c__2, &i__1); - ll = (lf << 1) - 1; - } - i__1 = ll; - for (i__ = lf; i__ <= i__1; ++i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - --j; - zlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & - b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & - givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & - givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * - poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + - lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ - j], &s[j], &rwork[1], info); -/* L150: */ - } -/* L160: */ - } - goto L330; - -/* ICOMPQ = 1: applying back the right singular vector factors. */ - -L170: - -/* - First now go through the right singular vector matrices of all - the tree nodes top-down. -*/ - - j = 0; - i__1 = nlvl; - for (lvl = 1; lvl <= i__1; ++lvl) { - lvl2 = (lvl << 1) - 1; - -/* - Find the first node LF and last node LL on - the current level LVL. -*/ - - if (lvl == 1) { - lf = 1; - ll = 1; - } else { - i__2 = lvl - 1; - lf = pow_ii(&c__2, &i__2); - ll = (lf << 1) - 1; - } - i__2 = lf; - for (i__ = ll; i__ >= i__2; --i__) { - im1 = i__ - 1; - ic = iwork[inode + im1]; - nl = iwork[ndiml + im1]; - nr = iwork[ndimr + im1]; - nlf = ic - nl; - nrf = ic + 1; - if (i__ == ll) { - sqre = 0; - } else { - sqre = 1; - } - ++j; - zlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ - nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & - givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & - givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * - poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + - lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ - j], &s[j], &rwork[1], info); -/* L180: */ - } -/* L190: */ - } - -/* - The nodes on the bottom level of the tree were solved - by DLASDQ. The corresponding right singular vector - matrices are in explicit form. Apply them back. -*/ - - ndb1 = (nd + 1) / 2; - i__1 = nd; - for (i__ = ndb1; i__ <= i__1; ++i__) { - i1 = i__ - 1; - ic = iwork[inode + i1]; - nl = iwork[ndiml + i1]; - nr = iwork[ndimr + i1]; - nlp1 = nl + 1; - if (i__ == nd) { - nrp1 = nr; - } else { - nrp1 = nr + 1; - } - nlf = ic - nl; - nrf = ic + 1; - -/* - Since B and BX are complex, the following call to DGEMM is - performed in two steps (real and imaginary parts). - - CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, - $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) -*/ - - j = nlp1 * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nlp1 - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++j; - i__4 = jrow + jcol * b_dim1; - rwork[j] = b[i__4].r; -/* L200: */ - } -/* L210: */ - } - dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1], - ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[ - 1], &nlp1); - j = nlp1 * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nlp1 - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L220: */ - } -/* L230: */ - } - dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1], - ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[ - nlp1 * *nrhs + 1], &nlp1); - jreal = 0; - jimag = nlp1 * *nrhs; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nlf + nlp1 - 1; - for (jrow = nlf; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * bx_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; -/* L240: */ - } -/* L250: */ - } - -/* - Since B and BX are complex, the following call to DGEMM is - performed in two steps (real and imaginary parts). - - CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, - $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) -*/ - - j = nrp1 * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nrp1 - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++j; - i__4 = jrow + jcol * b_dim1; - rwork[j] = b[i__4].r; -/* L260: */ - } -/* L270: */ - } - dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1], - ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[ - 1], &nrp1); - j = nrp1 * *nrhs << 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nrp1 - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L280: */ - } -/* L290: */ - } - dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1], - ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[ - nrp1 * *nrhs + 1], &nrp1); - jreal = 0; - jimag = nrp1 * *nrhs; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = nrf + nrp1 - 1; - for (jrow = nrf; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * bx_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - bx[i__4].r = z__1.r, bx[i__4].i = z__1.i; -/* L300: */ - } -/* L310: */ - } - -/* L320: */ - } - -L330: - - return 0; - -/* End of ZLALSA */ - -} /* zlalsa_ */ - -/* Subroutine */ int zlalsd_(char *uplo, integer *smlsiz, integer *n, integer - *nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb, - doublereal *rcond, integer *rank, doublecomplex *work, doublereal * - rwork, integer *iwork, integer *info) -{ - /* System generated locals */ - integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6; - doublereal d__1; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *), log(doublereal), d_sign(doublereal *, - doublereal *); - - /* Local variables */ - static integer c__, i__, j, k; - static doublereal r__; - static integer s, u, z__; - static doublereal cs; - static integer bx; - static doublereal sn; - static integer st, vt, nm1, st1; - static doublereal eps; - static integer iwk; - static doublereal tol; - static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow, - irwu, jimag; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - static integer jreal, irwib, poles, sizei, irwrb, nsize; - extern /* Subroutine */ int zdrot_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( - integer *, doublecomplex *, integer *, doublecomplex *, integer *) - ; - static integer irwvt, icmpq1, icmpq2; - - extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *, - doublereal *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *, integer *, integer *, - doublereal *, doublereal *, doublereal *, doublereal *, integer *, - integer *), dlascl_(char *, integer *, integer *, doublereal *, - doublereal *, integer *, integer *, doublereal *, integer *, - integer *); - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer - *, integer *, integer *, doublereal *, doublereal *, doublereal *, - integer *, doublereal *, integer *, doublereal *, integer *, - doublereal *, integer *), dlaset_(char *, integer *, - integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *), xerbla_(char *, integer *); - static integer givcol; - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int zlalsa_(integer *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, integer *, - doublereal *, integer *, doublereal *, integer *, doublereal *, - doublereal *, doublereal *, doublereal *, integer *, integer *, - integer *, integer *, doublereal *, doublereal *, doublereal *, - doublereal *, integer *, integer *), zlascl_(char *, integer *, - integer *, doublereal *, doublereal *, integer *, integer *, - doublecomplex *, integer *, integer *), dlasrt_(char *, - integer *, doublereal *, integer *), zlacpy_(char *, - integer *, integer *, doublecomplex *, integer *, doublecomplex *, - integer *), zlaset_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *); - static doublereal orgnrm; - static integer givnum, givptr, nrwork, irwwrk, smlszp; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1999 - - - Purpose - ======= - - ZLALSD uses the singular value decomposition of A to solve the least - squares problem of finding X to minimize the Euclidean norm of each - column of A*X-B, where A is N-by-N upper bidiagonal, and X and B - are N-by-NRHS. The solution X overwrites B. - - The singular values of A smaller than RCOND times the largest - singular value are treated as zero in solving the least squares - problem; in this case a minimum norm solution is returned. - The actual singular values are returned in D in ascending order. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': D and E define an upper bidiagonal matrix. - = 'L': D and E define a lower bidiagonal matrix. - - SMLSIZ (input) INTEGER - The maximum size of the subproblems at the bottom of the - computation tree. - - N (input) INTEGER - The dimension of the bidiagonal matrix. N >= 0. - - NRHS (input) INTEGER - The number of columns of B. NRHS must be at least 1. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry D contains the main diagonal of the bidiagonal - matrix. On exit, if INFO = 0, D contains its singular values. - - E (input) DOUBLE PRECISION array, dimension (N-1) - Contains the super-diagonal entries of the bidiagonal matrix. - On exit, E has been destroyed. - - B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) - On input, B contains the right hand sides of the least - squares problem. On output, B contains the solution X. - - LDB (input) INTEGER - The leading dimension of B in the calling subprogram. - LDB must be at least max(1,N). - - RCOND (input) DOUBLE PRECISION - The singular values of A less than or equal to RCOND times - the largest singular value are treated as zero in solving - the least squares problem. If RCOND is negative, - machine precision is used instead. - For example, if diag(S)*X=B were the least squares problem, - where diag(S) is a diagonal matrix of singular values, the - solution would be X(i) = B(i) / S(i) if S(i) is greater than - RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to - RCOND*max(S). - - RANK (output) INTEGER - The number of singular values of A greater than RCOND times - the largest singular value. - - WORK (workspace) COMPLEX*16 array, dimension at least - (N * NRHS). - - RWORK (workspace) DOUBLE PRECISION array, dimension at least - (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2), - where - NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) - - IWORK (workspace) INTEGER array, dimension at least - (3*N*NLVL + 11*N). - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an singular value while - working on the submatrix lying in rows and columns - INFO/(N+1) through MOD(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Ming Gu and Ren-Cang Li, Computer Science Division, University of - California at Berkeley, USA - Osni Marques, LBNL/NERSC, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - - if (*n < 0) { - *info = -3; - } else if (*nrhs < 1) { - *info = -4; - } else if (*ldb < 1 || *ldb < *n) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLALSD", &i__1); - return 0; - } - - eps = EPSILON; - -/* Set up the tolerance. */ - - if (*rcond <= 0. || *rcond >= 1.) { - *rcond = eps; - } - - *rank = 0; - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } else if (*n == 1) { - if (d__[1] == 0.) { - zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); - } else { - *rank = 1; - zlascl_("G", &c__0, &c__0, &d__[1], &c_b1015, &c__1, nrhs, &b[ - b_offset], ldb, info); - d__[1] = abs(d__[1]); - } - return 0; - } - -/* Rotate the matrix if it is lower bidiagonal. */ - - if (*(unsigned char *)uplo == 'L') { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); - d__[i__] = r__; - e[i__] = sn * d__[i__ + 1]; - d__[i__ + 1] = cs * d__[i__ + 1]; - if (*nrhs == 1) { - zdrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & - c__1, &cs, &sn); - } else { - rwork[(i__ << 1) - 1] = cs; - rwork[i__ * 2] = sn; - } -/* L10: */ - } - if (*nrhs > 1) { - i__1 = *nrhs; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = *n - 1; - for (j = 1; j <= i__2; ++j) { - cs = rwork[(j << 1) - 1]; - sn = rwork[j * 2]; - zdrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ - * b_dim1], &c__1, &cs, &sn); -/* L20: */ - } -/* L30: */ - } - } - } - -/* Scale. */ - - nm1 = *n - 1; - orgnrm = dlanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.) { - zlaset_("A", n, nrhs, &c_b59, &c_b59, &b[b_offset], ldb); - return 0; - } - - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, &c__1, &d__[1], n, info); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &nm1, &c__1, &e[1], &nm1, - info); - -/* - If N is smaller than the minimum divide size SMLSIZ, then solve - the problem with another solver. -*/ - - if (*n <= *smlsiz) { - irwu = 1; - irwvt = irwu + *n * *n; - irwwrk = irwvt + *n * *n; - irwrb = irwwrk; - irwib = irwrb + *n * *nrhs; - irwb = irwib + *n * *nrhs; - dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwu], n); - dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwvt], n); - dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n, - &rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info); - if (*info != 0) { - return 0; - } - -/* - In the real version, B is passed to DLASDQ and multiplied - internally by Q'. Here B is complex and that product is - computed below in two steps (real and imaginary parts). -*/ - - j = irwb - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++j; - i__3 = jrow + jcol * b_dim1; - rwork[j] = b[i__3].r; -/* L40: */ - } -/* L50: */ - } - dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb], - n, &c_b324, &rwork[irwrb], n); - j = irwb - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L60: */ - } -/* L70: */ - } - dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb], - n, &c_b324, &rwork[irwib], n); - jreal = irwrb - 1; - jimag = irwib - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++jreal; - ++jimag; - i__3 = jrow + jcol * b_dim1; - i__4 = jreal; - i__5 = jimag; - z__1.r = rwork[i__4], z__1.i = rwork[i__5]; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L80: */ - } -/* L90: */ - } - - tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if (d__[i__] <= tol) { - zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[i__ + b_dim1], - ldb); - } else { - zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, & - b[i__ + b_dim1], ldb, info); - ++(*rank); - } -/* L100: */ - } - -/* - Since B is complex, the following call to DGEMM is performed - in two steps (real and imaginary parts). That is for V * B - (in the real version of the code V' is stored in WORK). - - CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, - $ WORK( NWORK ), N ) -*/ - - j = irwb - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++j; - i__3 = jrow + jcol * b_dim1; - rwork[j] = b[i__3].r; -/* L110: */ - } -/* L120: */ - } - dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb], - n, &c_b324, &rwork[irwrb], n); - j = irwb - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L130: */ - } -/* L140: */ - } - dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb], - n, &c_b324, &rwork[irwib], n); - jreal = irwrb - 1; - jimag = irwib - 1; - i__1 = *nrhs; - for (jcol = 1; jcol <= i__1; ++jcol) { - i__2 = *n; - for (jrow = 1; jrow <= i__2; ++jrow) { - ++jreal; - ++jimag; - i__3 = jrow + jcol * b_dim1; - i__4 = jreal; - i__5 = jimag; - z__1.r = rwork[i__4], z__1.i = rwork[i__5]; - b[i__3].r = z__1.r, b[i__3].i = z__1.i; -/* L150: */ - } -/* L160: */ - } - -/* Unscale. */ - - dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, - info); - dlasrt_("D", n, &d__[1], info); - zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], - ldb, info); - - return 0; - } - -/* Book-keeping and setting up some constants. */ - - nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / - log(2.)) + 1; - - smlszp = *smlsiz + 1; - - u = 1; - vt = *smlsiz * *n + 1; - difl = vt + smlszp * *n; - difr = difl + nlvl * *n; - z__ = difr + (nlvl * *n << 1); - c__ = z__ + nlvl * *n; - s = c__ + *n; - poles = s + *n; - givnum = poles + (nlvl << 1) * *n; - nrwork = givnum + (nlvl << 1) * *n; - bx = 1; - - irwrb = nrwork; - irwib = irwrb + *smlsiz * *nrhs; - irwb = irwib + *smlsiz * *nrhs; - - sizei = *n + 1; - k = sizei + *n; - givptr = k + *n; - perm = givptr + *n; - givcol = perm + nlvl * *n; - iwk = givcol + (nlvl * *n << 1); - - st = 1; - sqre = 0; - icmpq1 = 1; - icmpq2 = 0; - nsub = 0; - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = d__[i__], abs(d__1)) < eps) { - d__[i__] = d_sign(&eps, &d__[i__]); - } -/* L170: */ - } - - i__1 = nm1; - for (i__ = 1; i__ <= i__1; ++i__) { - if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { - ++nsub; - iwork[nsub] = st; - -/* - Subproblem found. First determine its size and then - apply divide and conquer on it. -*/ - - if (i__ < nm1) { - -/* A subproblem with E(I) small for I < NM1. */ - - nsize = i__ - st + 1; - iwork[sizei + nsub - 1] = nsize; - } else if ((d__1 = e[i__], abs(d__1)) >= eps) { - -/* A subproblem with E(NM1) not too small but I = NM1. */ - - nsize = *n - st + 1; - iwork[sizei + nsub - 1] = nsize; - } else { - -/* - A subproblem with E(NM1) small. This implies an - 1-by-1 subproblem at D(N), which is not solved - explicitly. -*/ - - nsize = i__ - st + 1; - iwork[sizei + nsub - 1] = nsize; - ++nsub; - iwork[nsub] = *n; - iwork[sizei + nsub - 1] = 1; - zcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); - } - st1 = st - 1; - if (nsize == 1) { - -/* - This is a 1-by-1 subproblem and is not solved - explicitly. -*/ - - zcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); - } else if (nsize <= *smlsiz) { - -/* This is a small subproblem and is solved by DLASDQ. */ - - dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[vt + - st1], n); - dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[u + - st1], n); - dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], & - e[st], &rwork[vt + st1], n, &rwork[u + st1], n, & - rwork[nrwork], &c__1, &rwork[nrwork], info) - ; - if (*info != 0) { - return 0; - } - -/* - In the real version, B is passed to DLASDQ and multiplied - internally by Q'. Here B is complex and that product is - computed below in two steps (real and imaginary parts). -*/ - - j = irwb - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = st + nsize - 1; - for (jrow = st; jrow <= i__3; ++jrow) { - ++j; - i__4 = jrow + jcol * b_dim1; - rwork[j] = b[i__4].r; -/* L180: */ - } -/* L190: */ - } - dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u + - st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], - &nsize); - j = irwb - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = st + nsize - 1; - for (jrow = st; jrow <= i__3; ++jrow) { - ++j; - rwork[j] = d_imag(&b[jrow + jcol * b_dim1]); -/* L200: */ - } -/* L210: */ - } - dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u + - st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], - &nsize); - jreal = irwrb - 1; - jimag = irwib - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = st + nsize - 1; - for (jrow = st; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * b_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L220: */ - } -/* L230: */ - } - - zlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + - st1], n); - } else { - -/* A large problem. Solve it using divide and conquer. */ - - dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & - rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1], - &rwork[difl + st1], &rwork[difr + st1], &rwork[z__ + - st1], &rwork[poles + st1], &iwork[givptr + st1], & - iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ - givnum + st1], &rwork[c__ + st1], &rwork[s + st1], & - rwork[nrwork], &iwork[iwk], info); - if (*info != 0) { - return 0; - } - bxst = bx + st1; - zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & - work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], & - iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1] - , &rwork[z__ + st1], &rwork[poles + st1], &iwork[ - givptr + st1], &iwork[givcol + st1], n, &iwork[perm + - st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[ - s + st1], &rwork[nrwork], &iwork[iwk], info); - if (*info != 0) { - return 0; - } - } - st = i__ + 1; - } -/* L240: */ - } - -/* Apply the singular values and treat the tiny ones as zero. */ - - tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* - Some of the elements in D can be negative because 1-by-1 - subproblems were not solved explicitly. -*/ - - if ((d__1 = d__[i__], abs(d__1)) <= tol) { - zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &work[bx + i__ - 1], n); - } else { - ++(*rank); - zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, & - work[bx + i__ - 1], n, info); - } - d__[i__] = (d__1 = d__[i__], abs(d__1)); -/* L250: */ - } - -/* Now apply back the right singular vectors. */ - - icmpq2 = 1; - i__1 = nsub; - for (i__ = 1; i__ <= i__1; ++i__) { - st = iwork[i__]; - st1 = st - 1; - nsize = iwork[sizei + i__ - 1]; - bxst = bx + st1; - if (nsize == 1) { - zcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); - } else if (nsize <= *smlsiz) { - -/* - Since B and BX are complex, the following call to DGEMM - is performed in two steps (real and imaginary parts). - - CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, - $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, - $ B( ST, 1 ), LDB ) -*/ - - j = bxst - *n - 1; - jreal = irwb - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - j += *n; - i__3 = nsize; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++jreal; - i__4 = j + jrow; - rwork[jreal] = work[i__4].r; -/* L260: */ - } -/* L270: */ - } - dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1], - n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], &nsize); - j = bxst - *n - 1; - jimag = irwb - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - j += *n; - i__3 = nsize; - for (jrow = 1; jrow <= i__3; ++jrow) { - ++jimag; - rwork[jimag] = d_imag(&work[j + jrow]); -/* L280: */ - } -/* L290: */ - } - dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1], - n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], &nsize); - jreal = irwrb - 1; - jimag = irwib - 1; - i__2 = *nrhs; - for (jcol = 1; jcol <= i__2; ++jcol) { - i__3 = st + nsize - 1; - for (jrow = st; jrow <= i__3; ++jrow) { - ++jreal; - ++jimag; - i__4 = jrow + jcol * b_dim1; - i__5 = jreal; - i__6 = jimag; - z__1.r = rwork[i__5], z__1.i = rwork[i__6]; - b[i__4].r = z__1.r, b[i__4].i = z__1.i; -/* L300: */ - } -/* L310: */ - } - } else { - zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + - b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], & - iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], & - rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr + - st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[ - givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[ - nrwork], &iwork[iwk], info); - if (*info != 0) { - return 0; - } - } -/* L320: */ - } - -/* Unscale and sort the singular values. */ - - dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, info); - dlasrt_("D", n, &d__[1], info); - zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], ldb, - info); - - return 0; - -/* End of ZLALSD */ - -} /* zlalsd_ */ - -doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a, - integer *lda, doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal ret_val, d__1, d__2; - - /* Builtin functions */ - double z_abs(doublecomplex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLANGE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - complex matrix A. - - Description - =========== - - ZLANGE returns the value - - ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in ZLANGE as described - above. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. When M = 0, - ZLANGE is set to zero. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. When N = 0, - ZLANGE is set to zero. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The m by n matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(M,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= M when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (min(*m,*n) == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); - value = max(d__1,d__2); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - sum += z_abs(&a[i__ + j * a_dim1]); -/* L30: */ - } - value = max(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += z_abs(&a[i__ + j * a_dim1]); -/* L60: */ - } -/* L70: */ - } - value = 0.; - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - zlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of ZLANGE */ - -} /* zlange_ */ - -doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, - integer *lda, doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublereal ret_val, d__1, d__2, d__3; - - /* Builtin functions */ - double z_abs(doublecomplex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, absa, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLANHE returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - complex hermitian matrix A. - - Description - =========== - - ZLANHE returns the value - - ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in ZLANHE as described - above. - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - hermitian matrix A is to be referenced. - = 'U': Upper triangular part of A is referenced - = 'L': Lower triangular part of A is referenced - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, ZLANHE is - set to zero. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The hermitian matrix A. If UPLO = 'U', the leading n by n - upper triangular part of A contains the upper triangular part - of the matrix A, and the strictly lower triangular part of A - is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of A contains the lower triangular part of - the matrix A, and the strictly upper triangular part of A is - not referenced. Note that the imaginary parts of the diagonal - elements need not be set and are assumed to be zero. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, - WORK is not referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); - value = max(d__1,d__2); -/* L10: */ - } -/* Computing MAX */ - i__2 = j + j * a_dim1; - d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); - value = max(d__2,d__3); -/* L20: */ - } - } else { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = j + j * a_dim1; - d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1)); - value = max(d__2,d__3); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); - value = max(d__1,d__2); -/* L30: */ - } -/* L40: */ - } - } - } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { - -/* Find normI(A) ( = norm1(A), since A is hermitian). */ - - value = 0.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - absa = z_abs(&a[i__ + j * a_dim1]); - sum += absa; - work[i__] += absa; -/* L50: */ - } - i__2 = j + j * a_dim1; - work[j] = sum + (d__1 = a[i__2].r, abs(d__1)); -/* L60: */ - } - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L70: */ - } - } else { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L80: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j + j * a_dim1; - sum = work[j] + (d__1 = a[i__2].r, abs(d__1)); - i__2 = *n; - for (i__ = j + 1; i__ <= i__2; ++i__) { - absa = z_abs(&a[i__ + j * a_dim1]); - sum += absa; - work[i__] += absa; -/* L90: */ - } - value = max(value,sum); -/* L100: */ - } - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - if (lsame_(uplo, "U")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L110: */ - } - } else { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - zlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); -/* L120: */ - } - } - sum *= 2; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - if (a[i__2].r != 0.) { - i__2 = i__ + i__ * a_dim1; - absa = (d__1 = a[i__2].r, abs(d__1)); - if (scale < absa) { -/* Computing 2nd power */ - d__1 = scale / absa; - sum = sum * (d__1 * d__1) + 1.; - scale = absa; - } else { -/* Computing 2nd power */ - d__1 = absa / scale; - sum += d__1 * d__1; - } - } -/* L130: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of ZLANHE */ - -} /* zlanhe_ */ - -doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda, - doublereal *work) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublereal ret_val, d__1, d__2; - - /* Builtin functions */ - double z_abs(doublecomplex *), sqrt(doublereal); - - /* Local variables */ - static integer i__, j; - static doublereal sum, scale; - extern logical lsame_(char *, char *); - static doublereal value; - extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, - doublereal *, doublereal *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLANHS returns the value of the one norm, or the Frobenius norm, or - the infinity norm, or the element of largest absolute value of a - Hessenberg matrix A. - - Description - =========== - - ZLANHS returns the value - - ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' - ( - ( norm1(A), NORM = '1', 'O' or 'o' - ( - ( normI(A), NORM = 'I' or 'i' - ( - ( normF(A), NORM = 'F', 'f', 'E' or 'e' - - where norm1 denotes the one norm of a matrix (maximum column sum), - normI denotes the infinity norm of a matrix (maximum row sum) and - normF denotes the Frobenius norm of a matrix (square root of sum of - squares). Note that max(abs(A(i,j))) is not a matrix norm. - - Arguments - ========= - - NORM (input) CHARACTER*1 - Specifies the value to be returned in ZLANHS as described - above. - - N (input) INTEGER - The order of the matrix A. N >= 0. When N = 0, ZLANHS is - set to zero. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The n by n upper Hessenberg matrix A; the part of A below the - first sub-diagonal is not referenced. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(N,1). - - WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), - where LWORK >= N when NORM = 'I'; otherwise, WORK is not - referenced. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --work; - - /* Function Body */ - if (*n == 0) { - value = 0.; - } else if (lsame_(norm, "M")) { - -/* Find max(abs(A(i,j))). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]); - value = max(d__1,d__2); -/* L10: */ - } -/* L20: */ - } - } else if (lsame_(norm, "O") || *(unsigned char *) - norm == '1') { - -/* Find norm1(A). */ - - value = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - sum = 0.; -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - sum += z_abs(&a[i__ + j * a_dim1]); -/* L30: */ - } - value = max(value,sum); -/* L40: */ - } - } else if (lsame_(norm, "I")) { - -/* Find normI(A). */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - work[i__] = 0.; -/* L50: */ - } - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - work[i__] += z_abs(&a[i__ + j * a_dim1]); -/* L60: */ - } -/* L70: */ - } - value = 0.; - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { -/* Computing MAX */ - d__1 = value, d__2 = work[i__]; - value = max(d__1,d__2); -/* L80: */ - } - } else if (lsame_(norm, "F") || lsame_(norm, "E")) { - -/* Find normF(A). */ - - scale = 0.; - sum = 1.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = *n, i__4 = j + 1; - i__2 = min(i__3,i__4); - zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); -/* L90: */ - } - value = scale * sqrt(sum); - } - - ret_val = value; - return ret_val; - -/* End of ZLANHS */ - -} /* zlanhs_ */ - -/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer * - lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc, - doublereal *rwork) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, - i__3, i__4, i__5; - doublereal d__1; - doublecomplex z__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, - integer *, doublereal *, doublereal *, integer *, doublereal *, - integer *, doublereal *, doublereal *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLARCM performs a very simple matrix-matrix multiplication: - C := A * B, - where A is M by M and real; B is M by N and complex; - C is M by N and complex. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix A and of the matrix C. - M >= 0. - - N (input) INTEGER - The number of columns and rows of the matrix B and - the number of columns of the matrix C. - N >= 0. - - A (input) DOUBLE PRECISION array, dimension (LDA, M) - A contains the M by M matrix A. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >=max(1,M). - - B (input) DOUBLE PRECISION array, dimension (LDB, N) - B contains the M by N matrix B. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >=max(1,M). - - C (input) COMPLEX*16 array, dimension (LDC, N) - C contains the M by N matrix C. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >=max(1,M). - - RWORK (workspace) DOUBLE PRECISION array, dimension (2*M*N) - - ===================================================================== - - - Quick return if possible. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --rwork; - - /* Function Body */ - if (*m == 0 || *n == 0) { - return 0; - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * b_dim1; - rwork[(j - 1) * *m + i__] = b[i__3].r; -/* L10: */ - } -/* L20: */ - } - - l = *m * *n + 1; - dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, & - c_b324, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = l + (j - 1) * *m + i__ - 1; - c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; -/* L30: */ - } -/* L40: */ - } - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - rwork[(j - 1) * *m + i__] = d_imag(&b[i__ + j * b_dim1]); -/* L50: */ - } -/* L60: */ - } - dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, & - c_b324, &rwork[l], m); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - d__1 = c__[i__4].r; - i__5 = l + (j - 1) * *m + i__ - 1; - z__1.r = d__1, z__1.i = rwork[i__5]; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L70: */ - } -/* L80: */ - } - - return 0; - -/* End of ZLARCM */ - -} /* zlarcm_ */ - -/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex - *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * - ldc, doublecomplex *work) -{ - /* System generated locals */ - integer c_dim1, c_offset; - doublecomplex z__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLARF applies a complex elementary reflector H to a complex M-by-N - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a complex scalar and v is a complex vector. - - If tau = 0, then H is taken to be the unit matrix. - - To apply H' (the conjugate transpose of H), supply conjg(tau) instead - tau. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) COMPLEX*16 array, dimension - (1 + (M-1)*abs(INCV)) if SIDE = 'L' - or (1 + (N-1)*abs(INCV)) if SIDE = 'R' - The vector v in the representation of H. V is not used if - TAU = 0. - - INCV (input) INTEGER - The increment between elements of v. INCV <> 0. - - TAU (input) COMPLEX*16 - The value tau in the representation of H. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension - (N) if SIDE = 'L' - or (M) if SIDE = 'R' - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (lsame_(side, "L")) { - -/* Form H * C */ - - if (tau->r != 0. || tau->i != 0.) { - -/* w := C' * v */ - - zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, & - v[1], incv, &c_b59, &work[1], &c__1); - -/* C := C - v * w' */ - - z__1.r = -tau->r, z__1.i = -tau->i; - zgerc_(m, n, &z__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], - ldc); - } - } else { - -/* Form C * H */ - - if (tau->r != 0. || tau->i != 0.) { - -/* w := C * v */ - - zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], - incv, &c_b59, &work[1], &c__1); - -/* C := C - w * v' */ - - z__1.r = -tau->r, z__1.i = -tau->i; - zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], incv, &c__[c_offset], - ldc); - } - } - return 0; - -/* End of ZLARF */ - -} /* zlarf_ */ - -/* Subroutine */ int zlarfb_(char *side, char *trans, char *direct, char * - storev, integer *m, integer *n, integer *k, doublecomplex *v, integer - *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * - ldc, doublecomplex *work, integer *ldwork) -{ - /* System generated locals */ - integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, - work_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1, z__2; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zcopy_(integer *, doublecomplex *, - integer *, doublecomplex *, integer *), ztrmm_(char *, char *, - char *, char *, integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, - integer *); - static char transt[1]; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLARFB applies a complex block reflector H or its transpose H' to a - complex M-by-N matrix C, from either the left or the right. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply H or H' from the Left - = 'R': apply H or H' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply H (No transpose) - = 'C': apply H' (Conjugate transpose) - - DIRECT (input) CHARACTER*1 - Indicates how H is formed from a product of elementary - reflectors - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Indicates how the vectors which define the elementary - reflectors are stored: - = 'C': Columnwise - = 'R': Rowwise - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - K (input) INTEGER - The order of the matrix T (= the number of elementary - reflectors whose product defines the block reflector). - - V (input) COMPLEX*16 array, dimension - (LDV,K) if STOREV = 'C' - (LDV,M) if STOREV = 'R' and SIDE = 'L' - (LDV,N) if STOREV = 'R' and SIDE = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); - if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); - if STOREV = 'R', LDV >= K. - - T (input) COMPLEX*16 array, dimension (LDT,K) - The triangular K-by-K matrix T in the representation of the - block reflector. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by H*C or H'*C or C*H or C*H'. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension (LDWORK,K) - - LDWORK (input) INTEGER - The leading dimension of the array WORK. - If SIDE = 'L', LDWORK >= max(1,N); - if SIDE = 'R', LDWORK >= max(1,M). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - work_dim1 = *ldwork; - work_offset = 1 + work_dim1; - work -= work_offset; - - /* Function Body */ - if (*m <= 0 || *n <= 0) { - return 0; - } - - if (lsame_(trans, "N")) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - - if (lsame_(storev, "C")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 ) (first K rows) - ( V2 ) - where V1 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); - zlacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L10: */ - } - -/* W := W * V1 */ - - ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2 */ - - i__1 = *m - *k; - zgemm_("Conjugate transpose", "No transpose", n, k, &i__1, - &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + - v_dim1], ldv, &c_b60, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2 * W' */ - - i__1 = *m - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", &i__1, n, k, - &z__1, &v[*k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork, &c_b60, &c__[*k + 1 + - c_dim1], ldc); - } - -/* W := W * V1' */ - - ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, - &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * c_dim1; - i__4 = j + i__ * c_dim1; - d_cnjg(&z__2, &work[i__ + j * work_dim1]); - z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - - z__2.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L20: */ - } -/* L30: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L40: */ - } - -/* W := W * V1 */ - - ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60, - &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2 */ - - i__1 = *n - *k; - zgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + - 1 + v_dim1], ldv, &c_b60, &work[work_offset], - ldwork); - } - -/* W := W * T or W * T' */ - - ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C2 := C2 - W * V2' */ - - i__1 = *n - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", m, &i__1, k, - &z__1, &work[work_offset], ldwork, &v[*k + 1 + - v_dim1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1], - ldc); - } - -/* W := W * V1' */ - - ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, - &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * work_dim1; - z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L50: */ - } -/* L60: */ - } - } - - } else { - -/* - Let V = ( V1 ) - ( V2 ) (last K rows) - where V2 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); - zlacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L70: */ - } - -/* W := W * V2 */ - - ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60, - &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - if (*m > *k) { - -/* W := W + C1'*V1 */ - - i__1 = *m - *k; - zgemm_("Conjugate transpose", "No transpose", n, k, &i__1, - &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b60, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1 * W' */ - - i__1 = *m - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", &i__1, n, k, - &z__1, &v[v_offset], ldv, &work[work_offset], - ldwork, &c_b60, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, - &c_b60, &v[*m - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = *m - *k + j + i__ * c_dim1; - i__4 = *m - *k + j + i__ * c_dim1; - d_cnjg(&z__2, &work[i__ + j * work_dim1]); - z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - - z__2.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L80: */ - } -/* L90: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V = (C1*V1 + C2*V2) (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L100: */ - } - -/* W := W * V2 */ - - ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60, - &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], - ldwork); - if (*n > *k) { - -/* W := W + C1 * V1 */ - - i__1 = *n - *k; - zgemm_("No transpose", "No transpose", m, k, &i__1, & - c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b60, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V' */ - - if (*n > *k) { - -/* C1 := C1 - W * V1' */ - - i__1 = *n - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", m, &i__1, k, - &z__1, &work[work_offset], ldwork, &v[v_offset], - ldv, &c_b60, &c__[c_offset], ldc); - } - -/* W := W * V2' */ - - ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, - &c_b60, &v[*n - *k + 1 + v_dim1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (*n - *k + j) * c_dim1; - i__4 = i__ + (*n - *k + j) * c_dim1; - i__5 = i__ + j * work_dim1; - z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L110: */ - } -/* L120: */ - } - } - } - - } else if (lsame_(storev, "R")) { - - if (lsame_(direct, "F")) { - -/* - Let V = ( V1 V2 ) (V1: first K columns) - where V1 is unit upper triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C1' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], - &c__1); - zlacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L130: */ - } - -/* W := W * V1' */ - - ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k, - &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*m > *k) { - -/* W := W + C2'*V2' */ - - i__1 = *m - *k; - zgemm_("Conjugate transpose", "Conjugate transpose", n, k, - &i__1, &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[(* - k + 1) * v_dim1 + 1], ldv, &c_b60, &work[ - work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C2 := C2 - V2' * W' */ - - i__1 = *m - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("Conjugate transpose", "Conjugate transpose", & - i__1, n, k, &z__1, &v[(*k + 1) * v_dim1 + 1], ldv, - &work[work_offset], ldwork, &c_b60, &c__[*k + 1 - + c_dim1], ldc); - } - -/* W := W * V1 */ - - ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * c_dim1; - i__4 = j + i__ * c_dim1; - d_cnjg(&z__2, &work[i__ + j * work_dim1]); - z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - - z__2.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L140: */ - } -/* L150: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C1 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * - work_dim1 + 1], &c__1); -/* L160: */ - } - -/* W := W * V1' */ - - ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k, - &c_b60, &v[v_offset], ldv, &work[work_offset], ldwork); - if (*n > *k) { - -/* W := W + C2 * V2' */ - - i__1 = *n - *k; - zgemm_("No transpose", "Conjugate transpose", m, k, &i__1, - &c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k - + 1) * v_dim1 + 1], ldv, &c_b60, &work[ - work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C2 := C2 - W * V2 */ - - i__1 = *n - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1, - &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 - + 1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1], - ldc); - } - -/* W := W * V1 */ - - ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60, - &v[v_offset], ldv, &work[work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * c_dim1; - i__4 = i__ + j * c_dim1; - i__5 = i__ + j * work_dim1; - z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L170: */ - } -/* L180: */ - } - - } - - } else { - -/* - Let V = ( V1 V2 ) (V2: last K columns) - where V2 is unit lower triangular. -*/ - - if (lsame_(side, "L")) { - -/* - Form H * C or H' * C where C = ( C1 ) - ( C2 ) - - W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK) - - W := C2' -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * - work_dim1 + 1], &c__1); - zlacgv_(n, &work[j * work_dim1 + 1], &c__1); -/* L190: */ - } - -/* W := W * V2' */ - - ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k, - &c_b60, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*m > *k) { - -/* W := W + C1'*V1' */ - - i__1 = *m - *k; - zgemm_("Conjugate transpose", "Conjugate transpose", n, k, - &i__1, &c_b60, &c__[c_offset], ldc, &v[v_offset], - ldv, &c_b60, &work[work_offset], ldwork); - } - -/* W := W * T' or W * T */ - - ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - V' * W' */ - - if (*m > *k) { - -/* C1 := C1 - V1' * W' */ - - i__1 = *m - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("Conjugate transpose", "Conjugate transpose", & - i__1, n, k, &z__1, &v[v_offset], ldv, &work[ - work_offset], ldwork, &c_b60, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60, - &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C2 := C2 - W' */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = *m - *k + j + i__ * c_dim1; - i__4 = *m - *k + j + i__ * c_dim1; - d_cnjg(&z__2, &work[i__ + j * work_dim1]); - z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - - z__2.i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L200: */ - } -/* L210: */ - } - - } else if (lsame_(side, "R")) { - -/* - Form C * H or C * H' where C = ( C1 C2 ) - - W := C * V' = (C1*V1' + C2*V2') (stored in WORK) - - W := C2 -*/ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ - j * work_dim1 + 1], &c__1); -/* L220: */ - } - -/* W := W * V2' */ - - ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k, - &c_b60, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - if (*n > *k) { - -/* W := W + C1 * V1' */ - - i__1 = *n - *k; - zgemm_("No transpose", "Conjugate transpose", m, k, &i__1, - &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, & - c_b60, &work[work_offset], ldwork); - } - -/* W := W * T or W * T' */ - - ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[ - t_offset], ldt, &work[work_offset], ldwork); - -/* C := C - W * V */ - - if (*n > *k) { - -/* C1 := C1 - W * V1 */ - - i__1 = *n - *k; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1, - &work[work_offset], ldwork, &v[v_offset], ldv, & - c_b60, &c__[c_offset], ldc); - } - -/* W := W * V2 */ - - ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60, - &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ - work_offset], ldwork); - -/* C1 := C1 - W */ - - i__1 = *k; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (*n - *k + j) * c_dim1; - i__4 = i__ + (*n - *k + j) * c_dim1; - i__5 = i__ + j * work_dim1; - z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ - i__4].i - work[i__5].i; - c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; -/* L230: */ - } -/* L240: */ - } - - } - - } - } - - return 0; - -/* End of ZLARFB */ - -} /* zlarfb_ */ - -/* Subroutine */ int zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * - x, integer *incx, doublecomplex *tau) -{ - /* System generated locals */ - integer i__1; - doublereal d__1, d__2; - doublecomplex z__1, z__2; - - /* Builtin functions */ - double d_imag(doublecomplex *), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static integer j, knt; - static doublereal beta, alphi, alphr; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *); - static doublereal xnorm; - extern doublereal dlapy3_(doublereal *, doublereal *, doublereal *), - dznrm2_(integer *, doublecomplex *, integer *), dlamch_(char *); - static doublereal safmin; - extern /* Subroutine */ int zdscal_(integer *, doublereal *, - doublecomplex *, integer *); - static doublereal rsafmn; - extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, - doublecomplex *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLARFG generates a complex elementary reflector H of order n, such - that - - H' * ( alpha ) = ( beta ), H' * H = I. - ( x ) ( 0 ) - - where alpha and beta are scalars, with beta real, and x is an - (n-1)-element complex vector. H is represented in the form - - H = I - tau * ( 1 ) * ( 1 v' ) , - ( v ) - - where tau is a complex scalar and v is a complex (n-1)-element - vector. Note that H is not hermitian. - - If the elements of x are all zero and alpha is real, then tau = 0 - and H is taken to be the unit matrix. - - Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . - - Arguments - ========= - - N (input) INTEGER - The order of the elementary reflector. - - ALPHA (input/output) COMPLEX*16 - On entry, the value alpha. - On exit, it is overwritten with the value beta. - - X (input/output) COMPLEX*16 array, dimension - (1+(N-2)*abs(INCX)) - On entry, the vector x. - On exit, it is overwritten with the vector v. - - INCX (input) INTEGER - The increment between elements of X. INCX > 0. - - TAU (output) COMPLEX*16 - The value tau. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n <= 0) { - tau->r = 0., tau->i = 0.; - return 0; - } - - i__1 = *n - 1; - xnorm = dznrm2_(&i__1, &x[1], incx); - alphr = alpha->r; - alphi = d_imag(alpha); - - if (xnorm == 0. && alphi == 0.) { - -/* H = I */ - - tau->r = 0., tau->i = 0.; - } else { - -/* general case */ - - d__1 = dlapy3_(&alphr, &alphi, &xnorm); - beta = -d_sign(&d__1, &alphr); - safmin = SAFEMINIMUM / EPSILON; - rsafmn = 1. / safmin; - - if (abs(beta) < safmin) { - -/* XNORM, BETA may be inaccurate; scale X and recompute them */ - - knt = 0; -L10: - ++knt; - i__1 = *n - 1; - zdscal_(&i__1, &rsafmn, &x[1], incx); - beta *= rsafmn; - alphi *= rsafmn; - alphr *= rsafmn; - if (abs(beta) < safmin) { - goto L10; - } - -/* New BETA is at most 1, at least SAFMIN */ - - i__1 = *n - 1; - xnorm = dznrm2_(&i__1, &x[1], incx); - z__1.r = alphr, z__1.i = alphi; - alpha->r = z__1.r, alpha->i = z__1.i; - d__1 = dlapy3_(&alphr, &alphi, &xnorm); - beta = -d_sign(&d__1, &alphr); - d__1 = (beta - alphr) / beta; - d__2 = -alphi / beta; - z__1.r = d__1, z__1.i = d__2; - tau->r = z__1.r, tau->i = z__1.i; - z__2.r = alpha->r - beta, z__2.i = alpha->i; - zladiv_(&z__1, &c_b60, &z__2); - alpha->r = z__1.r, alpha->i = z__1.i; - i__1 = *n - 1; - zscal_(&i__1, alpha, &x[1], incx); - -/* If ALPHA is subnormal, it may lose relative accuracy */ - - alpha->r = beta, alpha->i = 0.; - i__1 = knt; - for (j = 1; j <= i__1; ++j) { - z__1.r = safmin * alpha->r, z__1.i = safmin * alpha->i; - alpha->r = z__1.r, alpha->i = z__1.i; -/* L20: */ - } - } else { - d__1 = (beta - alphr) / beta; - d__2 = -alphi / beta; - z__1.r = d__1, z__1.i = d__2; - tau->r = z__1.r, tau->i = z__1.i; - z__2.r = alpha->r - beta, z__2.i = alpha->i; - zladiv_(&z__1, &c_b60, &z__2); - alpha->r = z__1.r, alpha->i = z__1.i; - i__1 = *n - 1; - zscal_(&i__1, alpha, &x[1], incx); - alpha->r = beta, alpha->i = 0.; - } - } - - return 0; - -/* End of ZLARFG */ - -} /* zlarfg_ */ - -/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * - k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * - t, integer *ldt) -{ - /* System generated locals */ - integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j; - static doublecomplex vii; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), - ztrmv_(char *, char *, char *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *), - zlacgv_(integer *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLARFT forms the triangular factor T of a complex block reflector H - of order n, which is defined as a product of k elementary reflectors. - - If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; - - If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. - - If STOREV = 'C', the vector which defines the elementary reflector - H(i) is stored in the i-th column of the array V, and - - H = I - V * T * V' - - If STOREV = 'R', the vector which defines the elementary reflector - H(i) is stored in the i-th row of the array V, and - - H = I - V' * T * V - - Arguments - ========= - - DIRECT (input) CHARACTER*1 - Specifies the order in which the elementary reflectors are - multiplied to form the block reflector: - = 'F': H = H(1) H(2) . . . H(k) (Forward) - = 'B': H = H(k) . . . H(2) H(1) (Backward) - - STOREV (input) CHARACTER*1 - Specifies how the vectors which define the elementary - reflectors are stored (see also Further Details): - = 'C': columnwise - = 'R': rowwise - - N (input) INTEGER - The order of the block reflector H. N >= 0. - - K (input) INTEGER - The order of the triangular factor T (= the number of - elementary reflectors). K >= 1. - - V (input/output) COMPLEX*16 array, dimension - (LDV,K) if STOREV = 'C' - (LDV,N) if STOREV = 'R' - The matrix V. See further details. - - LDV (input) INTEGER - The leading dimension of the array V. - If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i). - - T (output) COMPLEX*16 array, dimension (LDT,K) - The k by k triangular factor T of the block reflector. - If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is - lower triangular. The rest of the array is not used. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= K. - - Further Details - =============== - - The shape of the matrix V and the storage of the vectors which define - the H(i) is best illustrated by the following example with n = 5 and - k = 3. The elements equal to 1 are not stored; the corresponding - array elements are modified but restored on exit. The rest of the - array is not used. - - DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': - - V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) - ( v1 1 ) ( 1 v2 v2 v2 ) - ( v1 v2 1 ) ( 1 v3 v3 ) - ( v1 v2 v3 ) - ( v1 v2 v3 ) - - DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': - - V = ( v1 v2 v3 ) V = ( v1 v1 1 ) - ( v1 v2 v3 ) ( v2 v2 v2 1 ) - ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) - ( 1 v3 ) - ( 1 ) - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - v_dim1 = *ldv; - v_offset = 1 + v_dim1; - v -= v_offset; - --tau; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - - /* Function Body */ - if (*n == 0) { - return 0; - } - - if (lsame_(direct, "F")) { - i__1 = *k; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__; - if (tau[i__2].r == 0. && tau[i__2].i == 0.) { - -/* H(i) = I */ - - i__2 = i__; - for (j = 1; j <= i__2; ++j) { - i__3 = j + i__ * t_dim1; - t[i__3].r = 0., t[i__3].i = 0.; -/* L10: */ - } - } else { - -/* general case */ - - i__2 = i__ + i__ * v_dim1; - vii.r = v[i__2].r, vii.i = v[i__2].i; - i__2 = i__ + i__ * v_dim1; - v[i__2].r = 1., v[i__2].i = 0.; - if (lsame_(storev, "C")) { - -/* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ - - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - i__4 = i__; - z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; - zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ - + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, & - c_b59, &t[i__ * t_dim1 + 1], &c__1); - } else { - -/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ - - if (i__ < *n) { - i__2 = *n - i__; - zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); - } - i__2 = i__ - 1; - i__3 = *n - i__ + 1; - i__4 = i__; - z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; - zgemv_("No transpose", &i__2, &i__3, &z__1, &v[i__ * - v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, & - c_b59, &t[i__ * t_dim1 + 1], &c__1); - if (i__ < *n) { - i__2 = *n - i__; - zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv); - } - } - i__2 = i__ + i__ * v_dim1; - v[i__2].r = vii.r, v[i__2].i = vii.i; - -/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ - - i__2 = i__ - 1; - ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ - t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1); - i__2 = i__ + i__ * t_dim1; - i__3 = i__; - t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; - } -/* L20: */ - } - } else { - for (i__ = *k; i__ >= 1; --i__) { - i__1 = i__; - if (tau[i__1].r == 0. && tau[i__1].i == 0.) { - -/* H(i) = I */ - - i__1 = *k; - for (j = i__; j <= i__1; ++j) { - i__2 = j + i__ * t_dim1; - t[i__2].r = 0., t[i__2].i = 0.; -/* L30: */ - } - } else { - -/* general case */ - - if (i__ < *k) { - if (lsame_(storev, "C")) { - i__1 = *n - *k + i__ + i__ * v_dim1; - vii.r = v[i__1].r, vii.i = v[i__1].i; - i__1 = *n - *k + i__ + i__ * v_dim1; - v[i__1].r = 1., v[i__1].i = 0.; - -/* - T(i+1:k,i) := - - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) -*/ - - i__1 = *n - *k + i__; - i__2 = *k - i__; - i__3 = i__; - z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; - zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[ - (i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1 - + 1], &c__1, &c_b59, &t[i__ + 1 + i__ * - t_dim1], &c__1); - i__1 = *n - *k + i__ + i__ * v_dim1; - v[i__1].r = vii.r, v[i__1].i = vii.i; - } else { - i__1 = i__ + (*n - *k + i__) * v_dim1; - vii.r = v[i__1].r, vii.i = v[i__1].i; - i__1 = i__ + (*n - *k + i__) * v_dim1; - v[i__1].r = 1., v[i__1].i = 0.; - -/* - T(i+1:k,i) := - - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' -*/ - - i__1 = *n - *k + i__ - 1; - zlacgv_(&i__1, &v[i__ + v_dim1], ldv); - i__1 = *k - i__; - i__2 = *n - *k + i__; - i__3 = i__; - z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; - zgemv_("No transpose", &i__1, &i__2, &z__1, &v[i__ + - 1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, & - c_b59, &t[i__ + 1 + i__ * t_dim1], &c__1); - i__1 = *n - *k + i__ - 1; - zlacgv_(&i__1, &v[i__ + v_dim1], ldv); - i__1 = i__ + (*n - *k + i__) * v_dim1; - v[i__1].r = vii.r, v[i__1].i = vii.i; - } - -/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ - - i__1 = *k - i__; - ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ - + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * - t_dim1], &c__1) - ; - } - i__1 = i__ + i__ * t_dim1; - i__2 = i__; - t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; - } -/* L40: */ - } - } - return 0; - -/* End of ZLARFT */ - -} /* zlarft_ */ - -/* Subroutine */ int zlarfx_(char *side, integer *m, integer *n, - doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer * - ldc, doublecomplex *work) -{ - /* System generated locals */ - integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, - i__9, i__10, i__11; - doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8, z__9, z__10, - z__11, z__12, z__13, z__14, z__15, z__16, z__17, z__18, z__19; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer j; - static doublecomplex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, - v5, v6, v7, v8, v9, t10, v10, sum; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLARFX applies a complex elementary reflector H to a complex m by n - matrix C, from either the left or the right. H is represented in the - form - - H = I - tau * v * v' - - where tau is a complex scalar and v is a complex vector. - - If tau = 0, then H is taken to be the unit matrix - - This version uses inline code if H has order < 11. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': form H * C - = 'R': form C * H - - M (input) INTEGER - The number of rows of the matrix C. - - N (input) INTEGER - The number of columns of the matrix C. - - V (input) COMPLEX*16 array, dimension (M) if SIDE = 'L' - or (N) if SIDE = 'R' - The vector v in the representation of H. - - TAU (input) COMPLEX*16 - The value tau in the representation of H. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the m by n matrix C. - On exit, C is overwritten by the matrix H * C if SIDE = 'L', - or C * H if SIDE = 'R'. - - LDC (input) INTEGER - The leading dimension of the array C. LDA >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' - or (M) if SIDE = 'R' - WORK is not referenced if H has order < 11. - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --v; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - if (tau->r == 0. && tau->i == 0.) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form H * C, where H has order m. */ - - switch (*m) { - case 1: goto L10; - case 2: goto L30; - case 3: goto L50; - case 4: goto L70; - case 5: goto L90; - case 6: goto L110; - case 7: goto L130; - case 8: goto L150; - case 9: goto L170; - case 10: goto L190; - } - -/* - Code for general M - - w := C'*v -*/ - - zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1] - , &c__1, &c_b59, &work[1], &c__1); - -/* C := C - tau * v * w' */ - - z__1.r = -tau->r, z__1.i = -tau->i; - zgerc_(m, n, &z__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], - ldc); - goto L410; -L10: - -/* Special code for 1 x 1 Householder */ - - z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i - + tau->i * v[1].r; - d_cnjg(&z__4, &v[1]); - z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i - + z__3.i * z__4.r; - z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; - t1.r = z__1.r, t1.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r * - c__[i__3].i + t1.i * c__[i__3].r; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L20: */ - } - goto L410; -L30: - -/* Special code for 2 x 2 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L40: */ - } - goto L410; -L50: - -/* Special code for 3 x 3 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; - i__4 = j * c_dim1 + 3; - z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L60: */ - } - goto L410; -L70: - -/* Special code for 4 x 4 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i; - i__4 = j * c_dim1 + 3; - z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i; - i__5 = j * c_dim1 + 4; - z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L80: */ - } - goto L410; -L90: - -/* Special code for 5 x 5 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i; - i__4 = j * c_dim1 + 3; - z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i; - i__5 = j * c_dim1 + 4; - z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i; - i__6 = j * c_dim1 + 5; - z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r * - c__[i__6].i + v5.i * c__[i__6].r; - z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L100: */ - } - goto L410; -L110: - -/* Special code for 6 x 6 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - d_cnjg(&z__1, &v[6]); - v6.r = z__1.r, v6.i = z__1.i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i; - i__4 = j * c_dim1 + 3; - z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i; - i__5 = j * c_dim1 + 4; - z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i; - i__6 = j * c_dim1 + 5; - z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i; - i__7 = j * c_dim1 + 6; - z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L120: */ - } - goto L410; -L130: - -/* Special code for 7 x 7 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - d_cnjg(&z__1, &v[6]); - v6.r = z__1.r, v6.i = z__1.i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - d_cnjg(&z__1, &v[7]); - v7.r = z__1.r, v7.i = z__1.i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i; - i__4 = j * c_dim1 + 3; - z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i; - i__5 = j * c_dim1 + 4; - z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i; - i__6 = j * c_dim1 + 5; - z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i; - i__7 = j * c_dim1 + 6; - z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i; - i__8 = j * c_dim1 + 7; - z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L140: */ - } - goto L410; -L150: - -/* Special code for 8 x 8 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - d_cnjg(&z__1, &v[6]); - v6.r = z__1.r, v6.i = z__1.i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - d_cnjg(&z__1, &v[7]); - v7.r = z__1.r, v7.i = z__1.i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - d_cnjg(&z__1, &v[8]); - v8.r = z__1.r, v8.i = z__1.i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i; - i__4 = j * c_dim1 + 3; - z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i; - i__5 = j * c_dim1 + 4; - z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i; - i__6 = j * c_dim1 + 5; - z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i; - i__7 = j * c_dim1 + 6; - z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i; - i__8 = j * c_dim1 + 7; - z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i; - i__9 = j * c_dim1 + 8; - z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L160: */ - } - goto L410; -L170: - -/* Special code for 9 x 9 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - d_cnjg(&z__1, &v[6]); - v6.r = z__1.r, v6.i = z__1.i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - d_cnjg(&z__1, &v[7]); - v7.r = z__1.r, v7.i = z__1.i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - d_cnjg(&z__1, &v[8]); - v8.r = z__1.r, v8.i = z__1.i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - d_cnjg(&z__1, &v[9]); - v9.r = z__1.r, v9.i = z__1.i; - d_cnjg(&z__2, &v9); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t9.r = z__1.r, t9.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i; - i__4 = j * c_dim1 + 3; - z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i; - i__5 = j * c_dim1 + 4; - z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i; - i__6 = j * c_dim1 + 5; - z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i; - i__7 = j * c_dim1 + 6; - z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i; - i__8 = j * c_dim1 + 7; - z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i; - i__9 = j * c_dim1 + 8; - z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i; - i__10 = j * c_dim1 + 9; - z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 9; - i__3 = j * c_dim1 + 9; - z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + - sum.i * t9.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L180: */ - } - goto L410; -L190: - -/* Special code for 10 x 10 Householder */ - - d_cnjg(&z__1, &v[1]); - v1.r = z__1.r, v1.i = z__1.i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - d_cnjg(&z__1, &v[2]); - v2.r = z__1.r, v2.i = z__1.i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - d_cnjg(&z__1, &v[3]); - v3.r = z__1.r, v3.i = z__1.i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - d_cnjg(&z__1, &v[4]); - v4.r = z__1.r, v4.i = z__1.i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - d_cnjg(&z__1, &v[5]); - v5.r = z__1.r, v5.i = z__1.i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - d_cnjg(&z__1, &v[6]); - v6.r = z__1.r, v6.i = z__1.i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - d_cnjg(&z__1, &v[7]); - v7.r = z__1.r, v7.i = z__1.i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - d_cnjg(&z__1, &v[8]); - v8.r = z__1.r, v8.i = z__1.i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - d_cnjg(&z__1, &v[9]); - v9.r = z__1.r, v9.i = z__1.i; - d_cnjg(&z__2, &v9); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t9.r = z__1.r, t9.i = z__1.i; - d_cnjg(&z__1, &v[10]); - v10.r = z__1.r, v10.i = z__1.i; - d_cnjg(&z__2, &v10); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t10.r = z__1.r, t10.i = z__1.i; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j * c_dim1 + 1; - z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r - * c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j * c_dim1 + 2; - z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i; - i__4 = j * c_dim1 + 3; - z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i; - i__5 = j * c_dim1 + 4; - z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i; - i__6 = j * c_dim1 + 5; - z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i; - i__7 = j * c_dim1 + 6; - z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i; - i__8 = j * c_dim1 + 7; - z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i; - i__9 = j * c_dim1 + 8; - z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i; - i__10 = j * c_dim1 + 9; - z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i; - i__11 = j * c_dim1 + 10; - z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i = - v10.r * c__[i__11].i + v10.i * c__[i__11].r; - z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j * c_dim1 + 1; - i__3 = j * c_dim1 + 1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 2; - i__3 = j * c_dim1 + 2; - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 3; - i__3 = j * c_dim1 + 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 4; - i__3 = j * c_dim1 + 4; - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 5; - i__3 = j * c_dim1 + 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 6; - i__3 = j * c_dim1 + 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 7; - i__3 = j * c_dim1 + 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 8; - i__3 = j * c_dim1 + 8; - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 9; - i__3 = j * c_dim1 + 9; - z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + - sum.i * t9.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j * c_dim1 + 10; - i__3 = j * c_dim1 + 10; - z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i + - sum.i * t10.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L200: */ - } - goto L410; - } else { - -/* Form C * H, where H has order n. */ - - switch (*n) { - case 1: goto L210; - case 2: goto L230; - case 3: goto L250; - case 4: goto L270; - case 5: goto L290; - case 6: goto L310; - case 7: goto L330; - case 8: goto L350; - case 9: goto L370; - case 10: goto L390; - } - -/* - Code for general N - - w := C * v -*/ - - zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], & - c__1, &c_b59, &work[1], &c__1); - -/* C := C - tau * w * v' */ - - z__1.r = -tau->r, z__1.i = -tau->i; - zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], - ldc); - goto L410; -L210: - -/* Special code for 1 x 1 Householder */ - - z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i - + tau->i * v[1].r; - d_cnjg(&z__4, &v[1]); - z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i - + z__3.i * z__4.r; - z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; - t1.r = z__1.r, t1.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r * - c__[i__3].i + t1.i * c__[i__3].r; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L220: */ - } - goto L410; -L230: - -/* Special code for 2 x 2 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L240: */ - } - goto L410; -L250: - -/* Special code for 3 x 3 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; - i__4 = j + c_dim1 * 3; - z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L260: */ - } - goto L410; -L270: - -/* Special code for 4 x 4 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i; - i__4 = j + c_dim1 * 3; - z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i; - i__5 = j + (c_dim1 << 2); - z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L280: */ - } - goto L410; -L290: - -/* Special code for 5 x 5 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i; - i__4 = j + c_dim1 * 3; - z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i; - i__5 = j + (c_dim1 << 2); - z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i; - i__6 = j + c_dim1 * 5; - z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r * - c__[i__6].i + v5.i * c__[i__6].r; - z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L300: */ - } - goto L410; -L310: - -/* Special code for 6 x 6 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - v6.r = v[6].r, v6.i = v[6].i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i; - i__4 = j + c_dim1 * 3; - z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i; - i__5 = j + (c_dim1 << 2); - z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r * - c__[i__5].i + v4.i * c__[i__5].r; - z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i; - i__6 = j + c_dim1 * 5; - z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i; - i__7 = j + c_dim1 * 6; - z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L320: */ - } - goto L410; -L330: - -/* Special code for 7 x 7 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - v6.r = v[6].r, v6.i = v[6].i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - v7.r = v[7].r, v7.i = v[7].i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i; - i__4 = j + c_dim1 * 3; - z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r * - c__[i__4].i + v3.i * c__[i__4].r; - z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i; - i__5 = j + (c_dim1 << 2); - z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i; - i__6 = j + c_dim1 * 5; - z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i; - i__7 = j + c_dim1 * 6; - z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i; - i__8 = j + c_dim1 * 7; - z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L340: */ - } - goto L410; -L350: - -/* Special code for 8 x 8 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - v6.r = v[6].r, v6.i = v[6].i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - v7.r = v[7].r, v7.i = v[7].i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - v8.r = v[8].r, v8.i = v[8].i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r * - c__[i__3].i + v2.i * c__[i__3].r; - z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i; - i__4 = j + c_dim1 * 3; - z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i; - i__5 = j + (c_dim1 << 2); - z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i; - i__6 = j + c_dim1 * 5; - z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i; - i__7 = j + c_dim1 * 6; - z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i; - i__8 = j + c_dim1 * 7; - z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i; - i__9 = j + (c_dim1 << 3); - z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L360: */ - } - goto L410; -L370: - -/* Special code for 9 x 9 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - v6.r = v[6].r, v6.i = v[6].i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - v7.r = v[7].r, v7.i = v[7].i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - v8.r = v[8].r, v8.i = v[8].i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - v9.r = v[9].r, v9.i = v[9].i; - d_cnjg(&z__2, &v9); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t9.r = z__1.r, t9.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r * - c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i; - i__4 = j + c_dim1 * 3; - z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i; - i__5 = j + (c_dim1 << 2); - z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i; - i__6 = j + c_dim1 * 5; - z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i; - i__7 = j + c_dim1 * 6; - z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i; - i__8 = j + c_dim1 * 7; - z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i; - i__9 = j + (c_dim1 << 3); - z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i; - i__10 = j + c_dim1 * 9; - z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 9; - i__3 = j + c_dim1 * 9; - z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + - sum.i * t9.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L380: */ - } - goto L410; -L390: - -/* Special code for 10 x 10 Householder */ - - v1.r = v[1].r, v1.i = v[1].i; - d_cnjg(&z__2, &v1); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t1.r = z__1.r, t1.i = z__1.i; - v2.r = v[2].r, v2.i = v[2].i; - d_cnjg(&z__2, &v2); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t2.r = z__1.r, t2.i = z__1.i; - v3.r = v[3].r, v3.i = v[3].i; - d_cnjg(&z__2, &v3); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t3.r = z__1.r, t3.i = z__1.i; - v4.r = v[4].r, v4.i = v[4].i; - d_cnjg(&z__2, &v4); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t4.r = z__1.r, t4.i = z__1.i; - v5.r = v[5].r, v5.i = v[5].i; - d_cnjg(&z__2, &v5); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t5.r = z__1.r, t5.i = z__1.i; - v6.r = v[6].r, v6.i = v[6].i; - d_cnjg(&z__2, &v6); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t6.r = z__1.r, t6.i = z__1.i; - v7.r = v[7].r, v7.i = v[7].i; - d_cnjg(&z__2, &v7); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t7.r = z__1.r, t7.i = z__1.i; - v8.r = v[8].r, v8.i = v[8].i; - d_cnjg(&z__2, &v8); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t8.r = z__1.r, t8.i = z__1.i; - v9.r = v[9].r, v9.i = v[9].i; - d_cnjg(&z__2, &v9); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t9.r = z__1.r, t9.i = z__1.i; - v10.r = v[10].r, v10.i = v[10].i; - d_cnjg(&z__2, &v10); - z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i - + tau->i * z__2.r; - t10.r = z__1.r, t10.i = z__1.i; - i__1 = *m; - for (j = 1; j <= i__1; ++j) { - i__2 = j + c_dim1; - z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r - * c__[i__2].i + v1.i * c__[i__2].r; - i__3 = j + (c_dim1 << 1); - z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r - * c__[i__3].i + v2.i * c__[i__3].r; - z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i; - i__4 = j + c_dim1 * 3; - z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r - * c__[i__4].i + v3.i * c__[i__4].r; - z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i; - i__5 = j + (c_dim1 << 2); - z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r - * c__[i__5].i + v4.i * c__[i__5].r; - z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i; - i__6 = j + c_dim1 * 5; - z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r - * c__[i__6].i + v5.i * c__[i__6].r; - z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i; - i__7 = j + c_dim1 * 6; - z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r - * c__[i__7].i + v6.i * c__[i__7].r; - z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i; - i__8 = j + c_dim1 * 7; - z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r - * c__[i__8].i + v7.i * c__[i__8].r; - z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i; - i__9 = j + (c_dim1 << 3); - z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r - * c__[i__9].i + v8.i * c__[i__9].r; - z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i; - i__10 = j + c_dim1 * 9; - z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i = - v9.r * c__[i__10].i + v9.i * c__[i__10].r; - z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i; - i__11 = j + c_dim1 * 10; - z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i = - v10.r * c__[i__11].i + v10.i * c__[i__11].r; - z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i; - sum.r = z__1.r, sum.i = z__1.i; - i__2 = j + c_dim1; - i__3 = j + c_dim1; - z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i + - sum.i * t1.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 1); - i__3 = j + (c_dim1 << 1); - z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i + - sum.i * t2.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 3; - i__3 = j + c_dim1 * 3; - z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i + - sum.i * t3.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 2); - i__3 = j + (c_dim1 << 2); - z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i + - sum.i * t4.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 5; - i__3 = j + c_dim1 * 5; - z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i + - sum.i * t5.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 6; - i__3 = j + c_dim1 * 6; - z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i + - sum.i * t6.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 7; - i__3 = j + c_dim1 * 7; - z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i + - sum.i * t7.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + (c_dim1 << 3); - i__3 = j + (c_dim1 << 3); - z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i + - sum.i * t8.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 9; - i__3 = j + c_dim1 * 9; - z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i + - sum.i * t9.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; - i__2 = j + c_dim1 * 10; - i__3 = j + c_dim1 * 10; - z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i + - sum.i * t10.r; - z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i; - c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; -/* L400: */ - } - goto L410; - } -L410: - return 0; - -/* End of ZLARFX */ - -} /* zlarfx_ */ - -/* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku, - doublereal *cfrom, doublereal *cto, integer *m, integer *n, - doublecomplex *a, integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j, k1, k2, k3, k4; - static doublereal mul, cto1; - static logical done; - static doublereal ctoc; - extern logical lsame_(char *, char *); - static integer itype; - static doublereal cfrom1; - - static doublereal cfromc; - extern /* Subroutine */ int xerbla_(char *, integer *); - static doublereal bignum, smlnum; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - February 29, 1992 - - - Purpose - ======= - - ZLASCL multiplies the M by N complex matrix A by the real scalar - CTO/CFROM. This is done without over/underflow as long as the final - result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that - A may be full, upper triangular, lower triangular, upper Hessenberg, - or banded. - - Arguments - ========= - - TYPE (input) CHARACTER*1 - TYPE indices the storage type of the input matrix. - = 'G': A is a full matrix. - = 'L': A is a lower triangular matrix. - = 'U': A is an upper triangular matrix. - = 'H': A is an upper Hessenberg matrix. - = 'B': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the lower - half stored. - = 'Q': A is a symmetric band matrix with lower bandwidth KL - and upper bandwidth KU and with the only the upper - half stored. - = 'Z': A is a band matrix with lower bandwidth KL and upper - bandwidth KU. - - KL (input) INTEGER - The lower bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - KU (input) INTEGER - The upper bandwidth of A. Referenced only if TYPE = 'B', - 'Q' or 'Z'. - - CFROM (input) DOUBLE PRECISION - CTO (input) DOUBLE PRECISION - The matrix A is multiplied by CTO/CFROM. A(I,J) is computed - without over/underflow if the final result CTO*A(I,J)/CFROM - can be represented without over/underflow. CFROM must be - nonzero. - - M (input) INTEGER - The number of rows of the matrix A. M >= 0. - - N (input) INTEGER - The number of columns of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,M) - The matrix to be multiplied by CTO/CFROM. See TYPE for the - storage type. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - INFO (output) INTEGER - 0 - successful exit - <0 - if INFO = -i, the i-th argument had an illegal value. - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - - if (lsame_(type__, "G")) { - itype = 0; - } else if (lsame_(type__, "L")) { - itype = 1; - } else if (lsame_(type__, "U")) { - itype = 2; - } else if (lsame_(type__, "H")) { - itype = 3; - } else if (lsame_(type__, "B")) { - itype = 4; - } else if (lsame_(type__, "Q")) { - itype = 5; - } else if (lsame_(type__, "Z")) { - itype = 6; - } else { - itype = -1; - } - - if (itype == -1) { - *info = -1; - } else if (*cfrom == 0.) { - *info = -4; - } else if (*m < 0) { - *info = -6; - } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { - *info = -7; - } else if (itype <= 3 && *lda < max(1,*m)) { - *info = -9; - } else if (itype >= 4) { -/* Computing MAX */ - i__1 = *m - 1; - if (*kl < 0 || *kl > max(i__1,0)) { - *info = -2; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = *n - 1; - if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && - *kl != *ku) { - *info = -3; - } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * - ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { - *info = -9; - } - } - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLASCL", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *m == 0) { - return 0; - } - -/* Get machine parameters */ - - smlnum = SAFEMINIMUM; - bignum = 1. / smlnum; - - cfromc = *cfrom; - ctoc = *cto; - -L10: - cfrom1 = cfromc * smlnum; - cto1 = ctoc / bignum; - if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { - mul = smlnum; - done = FALSE_; - cfromc = cfrom1; - } else if (abs(cto1) > abs(cfromc)) { - mul = bignum; - done = FALSE_; - ctoc = cto1; - } else { - mul = ctoc / cfromc; - done = TRUE_; - } - - if (itype == 0) { - -/* Full matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L20: */ - } -/* L30: */ - } - - } else if (itype == 1) { - -/* Lower triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L40: */ - } -/* L50: */ - } - - } else if (itype == 2) { - -/* Upper triangular matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = min(j,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L60: */ - } -/* L70: */ - } - - } else if (itype == 3) { - -/* Upper Hessenberg matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j + 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L80: */ - } -/* L90: */ - } - - } else if (itype == 4) { - -/* Lower half of a symmetric band matrix */ - - k3 = *kl + 1; - k4 = *n + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = k3, i__4 = k4 - j; - i__2 = min(i__3,i__4); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L100: */ - } -/* L110: */ - } - - } else if (itype == 5) { - -/* Upper half of a symmetric band matrix */ - - k1 = *ku + 2; - k3 = *ku + 1; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = k1 - j; - i__3 = k3; - for (i__ = max(i__2,1); i__ <= i__3; ++i__) { - i__2 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L120: */ - } -/* L130: */ - } - - } else if (itype == 6) { - -/* Band matrix */ - - k1 = *kl + *ku + 2; - k2 = *kl + 1; - k3 = (*kl << 1) + *ku + 1; - k4 = *kl + *ku + 1 + *m; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__3 = k1 - j; -/* Computing MIN */ - i__4 = k3, i__5 = k4 - j; - i__2 = min(i__4,i__5); - for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + j * a_dim1; - z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L140: */ - } -/* L150: */ - } - - } - - if (! done) { - goto L10; - } - - return 0; - -/* End of ZLASCL */ - -} /* zlascl_ */ - -/* Subroutine */ int zlaset_(char *uplo, integer *m, integer *n, - doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * - lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j; - extern logical lsame_(char *, char *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLASET initializes a 2-D array A to BETA on the diagonal and - ALPHA on the offdiagonals. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies the part of the matrix A to be set. - = 'U': Upper triangular part is set. The lower triangle - is unchanged. - = 'L': Lower triangular part is set. The upper triangle - is unchanged. - Otherwise: All of the matrix A is set. - - M (input) INTEGER - On entry, M specifies the number of rows of A. - - N (input) INTEGER - On entry, N specifies the number of columns of A. - - ALPHA (input) COMPLEX*16 - All the offdiagonal array elements are set to ALPHA. - - BETA (input) COMPLEX*16 - All the diagonal array elements are set to BETA. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the m by n matrix A. - On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; - A(i,i) = BETA , 1 <= i <= min(m,n) - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - if (lsame_(uplo, "U")) { - -/* - Set the diagonal to BETA and the strictly upper triangular - part of the array to ALPHA. -*/ - - i__1 = *n; - for (j = 2; j <= i__1; ++j) { -/* Computing MIN */ - i__3 = j - 1; - i__2 = min(i__3,*m); - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L10: */ - } -/* L20: */ - } - i__1 = min(*n,*m); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L30: */ - } - - } else if (lsame_(uplo, "L")) { - -/* - Set the diagonal to BETA and the strictly lower triangular - part of the array to ALPHA. -*/ - - i__1 = min(*m,*n); - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L40: */ - } -/* L50: */ - } - i__1 = min(*n,*m); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L60: */ - } - - } else { - -/* - Set the array to BETA on the diagonal and ALPHA on the - offdiagonal. -*/ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = alpha->r, a[i__3].i = alpha->i; -/* L70: */ - } -/* L80: */ - } - i__1 = min(*m,*n); - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - a[i__2].r = beta->r, a[i__2].i = beta->i; -/* L90: */ - } - } - - return 0; - -/* End of ZLASET */ - -} /* zlaset_ */ - -/* Subroutine */ int zlasr_(char *side, char *pivot, char *direct, integer *m, - integer *n, doublereal *c__, doublereal *s, doublecomplex *a, - integer *lda) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublecomplex z__1, z__2, z__3; - - /* Local variables */ - static integer i__, j, info; - static doublecomplex temp; - extern logical lsame_(char *, char *); - static doublereal ctemp, stemp; - extern /* Subroutine */ int xerbla_(char *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - October 31, 1992 - - - Purpose - ======= - - ZLASR performs the transformation - - A := P*A, when SIDE = 'L' or 'l' ( Left-hand side ) - - A := A*P', when SIDE = 'R' or 'r' ( Right-hand side ) - - where A is an m by n complex matrix and P is an orthogonal matrix, - consisting of a sequence of plane rotations determined by the - parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l' - and z = n when SIDE = 'R' or 'r' ): - - When DIRECT = 'F' or 'f' ( Forward sequence ) then - - P = P( z - 1 )*...*P( 2 )*P( 1 ), - - and when DIRECT = 'B' or 'b' ( Backward sequence ) then - - P = P( 1 )*P( 2 )*...*P( z - 1 ), - - where P( k ) is a plane rotation matrix for the following planes: - - when PIVOT = 'V' or 'v' ( Variable pivot ), - the plane ( k, k + 1 ) - - when PIVOT = 'T' or 't' ( Top pivot ), - the plane ( 1, k + 1 ) - - when PIVOT = 'B' or 'b' ( Bottom pivot ), - the plane ( k, z ) - - c( k ) and s( k ) must contain the cosine and sine that define the - matrix P( k ). The two by two plane rotation part of the matrix - P( k ), R( k ), is assumed to be of the form - - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - Arguments - ========= - - SIDE (input) CHARACTER*1 - Specifies whether the plane rotation matrix P is applied to - A on the left or the right. - = 'L': Left, compute A := P*A - = 'R': Right, compute A:= A*P' - - DIRECT (input) CHARACTER*1 - Specifies whether P is a forward or backward sequence of - plane rotations. - = 'F': Forward, P = P( z - 1 )*...*P( 2 )*P( 1 ) - = 'B': Backward, P = P( 1 )*P( 2 )*...*P( z - 1 ) - - PIVOT (input) CHARACTER*1 - Specifies the plane for which P(k) is a plane rotation - matrix. - = 'V': Variable pivot, the plane (k,k+1) - = 'T': Top pivot, the plane (1,k+1) - = 'B': Bottom pivot, the plane (k,z) - - M (input) INTEGER - The number of rows of the matrix A. If m <= 1, an immediate - return is effected. - - N (input) INTEGER - The number of columns of the matrix A. If n <= 1, an - immediate return is effected. - - C, S (input) DOUBLE PRECISION arrays, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - c(k) and s(k) contain the cosine and sine that define the - matrix P(k). The two by two plane rotation part of the - matrix P(k), R(k), is assumed to be of the form - R( k ) = ( c( k ) s( k ) ). - ( -s( k ) c( k ) ) - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - The m by n matrix A. On exit, A is overwritten by P*A if - SIDE = 'R' or by A*P' if SIDE = 'L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,M). - - ===================================================================== - - - Test the input parameters -*/ - - /* Parameter adjustments */ - --c__; - --s; - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - info = 0; - if (! (lsame_(side, "L") || lsame_(side, "R"))) { - info = 1; - } else if (! (lsame_(pivot, "V") || lsame_(pivot, - "T") || lsame_(pivot, "B"))) { - info = 2; - } else if (! (lsame_(direct, "F") || lsame_(direct, - "B"))) { - info = 3; - } else if (*m < 0) { - info = 4; - } else if (*n < 0) { - info = 5; - } else if (*lda < max(1,*m)) { - info = 9; - } - if (info != 0) { - xerbla_("ZLASR ", &info); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - return 0; - } - if (lsame_(side, "L")) { - -/* Form P * A */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + 1 + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + 1 + i__ * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__4 = j + i__ * a_dim1; - z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ - i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = j + i__ * a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__4 = j + i__ * a_dim1; - z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L10: */ - } - } -/* L20: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + 1 + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + 1 + i__ * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__3 = j + i__ * a_dim1; - z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ - i__3].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = j + i__ * a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__3 = j + i__ * a_dim1; - z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ - i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L30: */ - } - } -/* L40: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *m; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + i__ * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__4 = i__ * a_dim1 + 1; - z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ - i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = i__ * a_dim1 + 1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__4 = i__ * a_dim1 + 1; - z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L50: */ - } - } -/* L60: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + i__ * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__3 = i__ * a_dim1 + 1; - z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ - i__3].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = i__ * a_dim1 + 1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__3 = i__ * a_dim1 + 1; - z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ - i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L70: */ - } - } -/* L80: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *m - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = j + i__ * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = j + i__ * a_dim1; - i__4 = *m + i__ * a_dim1; - z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ - i__4].i; - z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = *m + i__ * a_dim1; - i__4 = *m + i__ * a_dim1; - z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ - i__4].i; - z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L90: */ - } - } -/* L100: */ - } - } else if (lsame_(direct, "B")) { - for (j = *m - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = j + i__ * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = j + i__ * a_dim1; - i__3 = *m + i__ * a_dim1; - z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ - i__3].i; - z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = *m + i__ * a_dim1; - i__3 = *m + i__ * a_dim1; - z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ - i__3].i; - z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L110: */ - } - } -/* L120: */ - } - } - } - } else if (lsame_(side, "R")) { - -/* Form A * P' */ - - if (lsame_(pivot, "V")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + (j + 1) * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + (j + 1) * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__4 = i__ + j * a_dim1; - z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ - i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = i__ + j * a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__4 = i__ + j * a_dim1; - z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L130: */ - } - } -/* L140: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + (j + 1) * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + (j + 1) * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__3 = i__ + j * a_dim1; - z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ - i__3].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = i__ + j * a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__3 = i__ + j * a_dim1; - z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ - i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L150: */ - } - } -/* L160: */ - } - } - } else if (lsame_(pivot, "T")) { - if (lsame_(direct, "F")) { - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + j * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__4 = i__ + a_dim1; - z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ - i__4].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = i__ + a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__4 = i__ + a_dim1; - z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ - i__4].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L170: */ - } - } -/* L180: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n; j >= 2; --j) { - ctemp = c__[j - 1]; - stemp = s[j - 1]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + j * a_dim1; - z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; - i__3 = i__ + a_dim1; - z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ - i__3].i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = i__ + a_dim1; - z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; - i__3 = i__ + a_dim1; - z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ - i__3].i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L190: */ - } - } -/* L200: */ - } - } - } else if (lsame_(pivot, "B")) { - if (lsame_(direct, "F")) { - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__2 = *m; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - temp.r = a[i__3].r, temp.i = a[i__3].i; - i__3 = i__ + j * a_dim1; - i__4 = i__ + *n * a_dim1; - z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ - i__4].i; - z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; - i__3 = i__ + *n * a_dim1; - i__4 = i__ + *n * a_dim1; - z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ - i__4].i; - z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__3].r = z__1.r, a[i__3].i = z__1.i; -/* L210: */ - } - } -/* L220: */ - } - } else if (lsame_(direct, "B")) { - for (j = *n - 1; j >= 1; --j) { - ctemp = c__[j]; - stemp = s[j]; - if (ctemp != 1. || stemp != 0.) { - i__1 = *m; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - temp.r = a[i__2].r, temp.i = a[i__2].i; - i__2 = i__ + j * a_dim1; - i__3 = i__ + *n * a_dim1; - z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ - i__3].i; - z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; - z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = i__ + *n * a_dim1; - i__3 = i__ + *n * a_dim1; - z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ - i__3].i; - z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; - z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - - z__3.i; - a[i__2].r = z__1.r, a[i__2].i = z__1.i; -/* L230: */ - } - } -/* L240: */ - } - } - } - } - - return 0; - -/* End of ZLASR */ - -} /* zlasr_ */ - -/* Subroutine */ int zlassq_(integer *n, doublecomplex *x, integer *incx, - doublereal *scale, doublereal *sumsq) -{ - /* System generated locals */ - integer i__1, i__2, i__3; - doublereal d__1; - - /* Builtin functions */ - double d_imag(doublecomplex *); - - /* Local variables */ - static integer ix; - static doublereal temp1; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLASSQ returns the values scl and ssq such that - - ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, - - where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is - assumed to be at least unity and the value of ssq will then satisfy - - 1.0 .le. ssq .le. ( sumsq + 2*n ). - - scale is assumed to be non-negative and scl returns the value - - scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), - i - - scale and sumsq must be supplied in SCALE and SUMSQ respectively. - SCALE and SUMSQ are overwritten by scl and ssq respectively. - - The routine makes only one pass through the vector X. - - Arguments - ========= - - N (input) INTEGER - The number of elements to be used from the vector X. - - X (input) COMPLEX*16 array, dimension (N) - The vector x as described above. - x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. - - INCX (input) INTEGER - The increment between successive values of the vector X. - INCX > 0. - - SCALE (input/output) DOUBLE PRECISION - On entry, the value scale in the equation above. - On exit, SCALE is overwritten with the value scl . - - SUMSQ (input/output) DOUBLE PRECISION - On entry, the value sumsq in the equation above. - On exit, SUMSQ is overwritten with the value ssq . - - ===================================================================== -*/ - - - /* Parameter adjustments */ - --x; - - /* Function Body */ - if (*n > 0) { - i__1 = (*n - 1) * *incx + 1; - i__2 = *incx; - for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { - i__3 = ix; - if (x[i__3].r != 0.) { - i__3 = ix; - temp1 = (d__1 = x[i__3].r, abs(d__1)); - if (*scale < temp1) { -/* Computing 2nd power */ - d__1 = *scale / temp1; - *sumsq = *sumsq * (d__1 * d__1) + 1; - *scale = temp1; - } else { -/* Computing 2nd power */ - d__1 = temp1 / *scale; - *sumsq += d__1 * d__1; - } - } - if (d_imag(&x[ix]) != 0.) { - temp1 = (d__1 = d_imag(&x[ix]), abs(d__1)); - if (*scale < temp1) { -/* Computing 2nd power */ - d__1 = *scale / temp1; - *sumsq = *sumsq * (d__1 * d__1) + 1; - *scale = temp1; - } else { -/* Computing 2nd power */ - d__1 = temp1 / *scale; - *sumsq += d__1 * d__1; - } - } -/* L10: */ - } - } - - return 0; - -/* End of ZLASSQ */ - -} /* zlassq_ */ - -/* Subroutine */ int zlaswp_(integer *n, doublecomplex *a, integer *lda, - integer *k1, integer *k2, integer *ipiv, integer *incx) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; - - /* Local variables */ - static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; - static doublecomplex temp; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZLASWP performs a series of row interchanges on the matrix A. - One row interchange is initiated for each of rows K1 through K2 of A. - - Arguments - ========= - - N (input) INTEGER - The number of columns of the matrix A. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the matrix of column dimension N to which the row - interchanges will be applied. - On exit, the permuted matrix. - - LDA (input) INTEGER - The leading dimension of the array A. - - K1 (input) INTEGER - The first element of IPIV for which a row interchange will - be done. - - K2 (input) INTEGER - The last element of IPIV for which a row interchange will - be done. - - IPIV (input) INTEGER array, dimension (M*abs(INCX)) - The vector of pivot indices. Only the elements in positions - K1 through K2 of IPIV are accessed. - IPIV(K) = L implies rows K and L are to be interchanged. - - INCX (input) INTEGER - The increment between successive values of IPIV. If IPIV - is negative, the pivots are applied in reverse order. - - Further Details - =============== - - Modified by - R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA - - ===================================================================== - - - Interchange row I with row IPIV(I) for each of rows K1 through K2. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --ipiv; - - /* Function Body */ - if (*incx > 0) { - ix0 = *k1; - i1 = *k1; - i2 = *k2; - inc = 1; - } else if (*incx < 0) { - ix0 = (1 - *k2) * *incx + 1; - i1 = *k2; - i2 = *k1; - inc = -1; - } else { - return 0; - } - - n32 = *n / 32 << 5; - if (n32 != 0) { - i__1 = n32; - for (j = 1; j <= i__1; j += 32) { - ix = ix0; - i__2 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) - { - ip = ipiv[ix]; - if (ip != i__) { - i__4 = j + 31; - for (k = j; k <= i__4; ++k) { - i__5 = i__ + k * a_dim1; - temp.r = a[i__5].r, temp.i = a[i__5].i; - i__5 = i__ + k * a_dim1; - i__6 = ip + k * a_dim1; - a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; - i__5 = ip + k * a_dim1; - a[i__5].r = temp.r, a[i__5].i = temp.i; -/* L10: */ - } - } - ix += *incx; -/* L20: */ - } -/* L30: */ - } - } - if (n32 != *n) { - ++n32; - ix = ix0; - i__1 = i2; - i__3 = inc; - for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { - ip = ipiv[ix]; - if (ip != i__) { - i__2 = *n; - for (k = n32; k <= i__2; ++k) { - i__4 = i__ + k * a_dim1; - temp.r = a[i__4].r, temp.i = a[i__4].i; - i__4 = i__ + k * a_dim1; - i__5 = ip + k * a_dim1; - a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; - i__4 = ip + k * a_dim1; - a[i__4].r = temp.r, a[i__4].i = temp.i; -/* L40: */ - } - } - ix += *incx; -/* L50: */ - } - } - - return 0; - -/* End of ZLASWP */ - -} /* zlaswp_ */ - -/* Subroutine */ int zlatrd_(char *uplo, integer *n, integer *nb, - doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, - doublecomplex *w, integer *ldw) -{ - /* System generated locals */ - integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; - doublereal d__1; - doublecomplex z__1, z__2, z__3, z__4; - - /* Local variables */ - static integer i__, iw; - static doublecomplex alpha; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *); - extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *), - zhemv_(char *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *), zaxpy_(integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, - integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to - Hermitian tridiagonal form by a unitary similarity - transformation Q' * A * Q, and returns the matrices V and W which are - needed to apply the transformation to the unreduced part of A. - - If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a - matrix, of which the upper triangle is supplied; - if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a - matrix, of which the lower triangle is supplied. - - This is an auxiliary routine called by ZHETRD. - - Arguments - ========= - - UPLO (input) CHARACTER - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. - - NB (input) INTEGER - The number of rows and columns to be reduced. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n-by-n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n-by-n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - On exit: - if UPLO = 'U', the last NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements above the diagonal - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors; - if UPLO = 'L', the first NB columns have been reduced to - tridiagonal form, with the diagonal elements overwriting - the diagonal elements of A; the elements below the diagonal - with the array TAU, represent the unitary matrix Q as a - product of elementary reflectors. - See Further Details. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - E (output) DOUBLE PRECISION array, dimension (N-1) - If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal - elements of the last NB columns of the reduced matrix; - if UPLO = 'L', E(1:nb) contains the subdiagonal elements of - the first NB columns of the reduced matrix. - - TAU (output) COMPLEX*16 array, dimension (N-1) - The scalar factors of the elementary reflectors, stored in - TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. - See Further Details. - - W (output) COMPLEX*16 array, dimension (LDW,NB) - The n-by-nb matrix W required to update the unreduced part - of A. - - LDW (input) INTEGER - The leading dimension of the array W. LDW >= max(1,N). - - Further Details - =============== - - If UPLO = 'U', the matrix Q is represented as a product of elementary - reflectors - - Q = H(n) H(n-1) . . . H(n-nb+1). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), - and tau in TAU(i-1). - - If UPLO = 'L', the matrix Q is represented as a product of elementary - reflectors - - Q = H(1) H(2) . . . H(nb). - - Each H(i) has the form - - H(i) = I - tau * v * v' - - where tau is a complex scalar, and v is a complex vector with - v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), - and tau in TAU(i). - - The elements of the vectors v together form the n-by-nb matrix V - which is needed, with W, to apply the transformation to the unreduced - part of the matrix, using a Hermitian rank-2k update of the form: - A := A - V*W' - W*V'. - - The contents of A on exit are illustrated by the following examples - with n = 5 and nb = 2: - - if UPLO = 'U': if UPLO = 'L': - - ( a a a v4 v5 ) ( d ) - ( a a v4 v5 ) ( 1 d ) - ( a 1 v5 ) ( v1 1 a ) - ( d 1 ) ( v1 v2 a a ) - ( d ) ( v1 v2 a a a ) - - where d denotes a diagonal element of the reduced matrix, a denotes - an element of the original matrix that is unchanged, and vi denotes - an element of the vector defining H(i). - - ===================================================================== - - - Quick return if possible -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --e; - --tau; - w_dim1 = *ldw; - w_offset = 1 + w_dim1; - w -= w_offset; - - /* Function Body */ - if (*n <= 0) { - return 0; - } - - if (lsame_(uplo, "U")) { - -/* Reduce last NB columns of upper triangle */ - - i__1 = *n - *nb + 1; - for (i__ = *n; i__ >= i__1; --i__) { - iw = i__ - *n + *nb; - if (i__ < *n) { - -/* Update A(1:i,i) */ - - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - i__2 = *n - i__; - zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); - i__2 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & - c_b60, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) * - w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & - c_b60, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - } - if (i__ > 1) { - -/* - Generate elementary reflector H(i) to annihilate - A(1:i-2,i) -*/ - - i__2 = i__ - 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = i__ - 1; - zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ - - 1]); - i__2 = i__ - 1; - e[i__2] = alpha.r; - i__2 = i__ - 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute W(1:i-1,i) */ - - i__2 = i__ - 1; - zhemv_("Upper", &i__2, &c_b60, &a[a_offset], lda, &a[i__ * - a_dim1 + 1], &c__1, &c_b59, &w[iw * w_dim1 + 1], & - c__1); - if (i__ < *n) { - i__2 = i__ - 1; - i__3 = *n - i__; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[( - iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], - &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * - a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[( - i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], - &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1); - i__2 = i__ - 1; - i__3 = *n - i__; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) * - w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & - c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1); - } - i__2 = i__ - 1; - zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); - z__3.r = -.5, z__3.i = -0.; - i__2 = i__ - 1; - z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = - z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; - i__3 = i__ - 1; - zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * - a_dim1 + 1], &c__1); - z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * - z__4.i + z__2.i * z__4.r; - alpha.r = z__1.r, alpha.i = z__1.i; - i__2 = i__ - 1; - zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * - w_dim1 + 1], &c__1); - } - -/* L10: */ - } - } else { - -/* Reduce first NB columns of lower triangle */ - - i__1 = *nb; - for (i__ = 1; i__ <= i__1; ++i__) { - -/* Update A(i:n,i) */ - - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - i__2 = i__ - 1; - zlacgv_(&i__2, &w[i__ + w_dim1], ldw); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, - &w[i__ + w_dim1], ldw, &c_b60, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - zlacgv_(&i__2, &w[i__ + w_dim1], ldw); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__ + 1; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw, - &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], & - c__1); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = i__ + i__ * a_dim1; - i__3 = i__ + i__ * a_dim1; - d__1 = a[i__3].r; - a[i__2].r = d__1, a[i__2].i = 0.; - if (i__ < *n) { - -/* - Generate elementary reflector H(i) to annihilate - A(i+2:n,i) -*/ - - i__2 = i__ + 1 + i__ * a_dim1; - alpha.r = a[i__2].r, alpha.i = a[i__2].i; - i__2 = *n - i__; -/* Computing MIN */ - i__3 = i__ + 2; - zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, - &tau[i__]); - i__2 = i__; - e[i__2] = alpha.r; - i__2 = i__ + 1 + i__ * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - -/* Compute W(i+1:n,i) */ - - i__2 = *n - i__; - zhemv_("Lower", &i__2, &c_b60, &a[i__ + 1 + (i__ + 1) * - a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b59, &w[i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[i__ + - 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b59, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + - a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - c_b59, &w[i__ * w_dim1 + 1], &c__1); - i__2 = *n - i__; - i__3 = i__ - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 + - w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - i__2 = *n - i__; - zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); - z__3.r = -.5, z__3.i = -0.; - i__2 = i__; - z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = - z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; - i__3 = *n - i__; - zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * - z__4.i + z__2.i * z__4.r; - alpha.r = z__1.r, alpha.i = z__1.i; - i__2 = *n - i__; - zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ - i__ + 1 + i__ * w_dim1], &c__1); - } - -/* L20: */ - } - } - - return 0; - -/* End of ZLATRD */ - -} /* zlatrd_ */ - -/* Subroutine */ int zlatrs_(char *uplo, char *trans, char *diag, char * - normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x, - doublereal *scale, doublereal *cnorm, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; - doublereal d__1, d__2, d__3, d__4; - doublecomplex z__1, z__2, z__3, z__4; - - /* Builtin functions */ - double d_imag(doublecomplex *); - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j; - static doublereal xj, rec, tjj; - static integer jinc; - static doublereal xbnd; - static integer imax; - static doublereal tmax; - static doublecomplex tjjs; - static doublereal xmax, grow; - extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, - integer *); - extern logical lsame_(char *, char *); - static doublereal tscal; - static doublecomplex uscal; - static integer jlast; - static doublecomplex csumj; - extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - static logical upper; - extern /* Double Complex */ VOID zdotu_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_( - char *, char *, char *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *), dlabad_( - doublereal *, doublereal *); - - extern integer idamax_(integer *, doublereal *, integer *); - extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( - integer *, doublereal *, doublecomplex *, integer *); - static doublereal bignum; - extern integer izamax_(integer *, doublecomplex *, integer *); - extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, - doublecomplex *); - static logical notran; - static integer jfirst; - extern doublereal dzasum_(integer *, doublecomplex *, integer *); - static doublereal smlnum; - static logical nounit; - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1992 - - - Purpose - ======= - - ZLATRS solves one of the triangular systems - - A * x = s*b, A**T * x = s*b, or A**H * x = s*b, - - with scaling to prevent overflow. Here A is an upper or lower - triangular matrix, A**T denotes the transpose of A, A**H denotes the - conjugate transpose of A, x and b are n-element vectors, and s is a - scaling factor, usually less than or equal to 1, chosen so that the - components of x will be less than the overflow threshold. If the - unscaled problem will not cause overflow, the Level 2 BLAS routine - ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), - then s is set to 0 and a non-trivial solution to A*x = 0 is returned. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - TRANS (input) CHARACTER*1 - Specifies the operation applied to A. - = 'N': Solve A * x = s*b (No transpose) - = 'T': Solve A**T * x = s*b (Transpose) - = 'C': Solve A**H * x = s*b (Conjugate transpose) - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - NORMIN (input) CHARACTER*1 - Specifies whether CNORM has been set or not. - = 'Y': CNORM contains the column norms on entry - = 'N': CNORM is not set on entry. On exit, the norms will - be computed and stored in CNORM. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The triangular matrix A. If UPLO = 'U', the leading n by n - upper triangular part of the array A contains the upper - triangular matrix, and the strictly lower triangular part of - A is not referenced. If UPLO = 'L', the leading n by n lower - triangular part of the array A contains the lower triangular - matrix, and the strictly upper triangular part of A is not - referenced. If DIAG = 'U', the diagonal elements of A are - also not referenced and are assumed to be 1. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max (1,N). - - X (input/output) COMPLEX*16 array, dimension (N) - On entry, the right hand side b of the triangular system. - On exit, X is overwritten by the solution vector x. - - SCALE (output) DOUBLE PRECISION - The scaling factor s for the triangular system - A * x = s*b, A**T * x = s*b, or A**H * x = s*b. - If SCALE = 0, the matrix A is singular or badly scaled, and - the vector x is an exact or approximate solution to A*x = 0. - - CNORM (input or output) DOUBLE PRECISION array, dimension (N) - - If NORMIN = 'Y', CNORM is an input argument and CNORM(j) - contains the norm of the off-diagonal part of the j-th column - of A. If TRANS = 'N', CNORM(j) must be greater than or equal - to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) - must be greater than or equal to the 1-norm. - - If NORMIN = 'N', CNORM is an output argument and CNORM(j) - returns the 1-norm of the offdiagonal part of the j-th column - of A. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - Further Details - ======= ======= - - A rough bound on x is computed; if that is less than overflow, ZTRSV - is called, otherwise, specific code is used which checks for possible - overflow or divide-by-zero at every operation. - - A columnwise scheme is used for solving A*x = b. The basic algorithm - if A is lower triangular is - - x[1:n] := b[1:n] - for j = 1, ..., n - x(j) := x(j) / A(j,j) - x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] - end - - Define bounds on the components of x after j iterations of the loop: - M(j) = bound on x[1:j] - G(j) = bound on x[j+1:n] - Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. - - Then for iteration j+1 we have - M(j+1) <= G(j) / | A(j+1,j+1) | - G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | - <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) - - where CNORM(j+1) is greater than or equal to the infinity-norm of - column j+1 of A, not counting the diagonal. Hence - - G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) - 1<=i<=j - and - - |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) - 1<=i< j - - Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the - reciprocal of the largest M(j), j=1,..,n, is larger than - max(underflow, 1/overflow). - - The bound on x(j) is also used to determine when a step in the - columnwise method can be performed without fear of overflow. If - the computed bound is greater than a large constant, x is scaled to - prevent overflow, but if the bound overflows, x is set to 0, x(j) to - 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. - - Similarly, a row-wise scheme is used to solve A**T *x = b or - A**H *x = b. The basic algorithm for A upper triangular is - - for j = 1, ..., n - x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) - end - - We simultaneously compute two bounds - G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j - M(j) = bound on x(i), 1<=i<=j - - The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we - add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. - Then the bound on x(j) is - - M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | - - <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) - 1<=i<=j - - and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater - than max(underflow, 1/overflow). - - ===================================================================== -*/ - - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --x; - --cnorm; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - notran = lsame_(trans, "N"); - nounit = lsame_(diag, "N"); - -/* Test the input parameters. */ - - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "T") && ! - lsame_(trans, "C")) { - *info = -2; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -3; - } else if (! lsame_(normin, "Y") && ! lsame_(normin, - "N")) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLATRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine machine dependent parameters to control overflow. */ - - smlnum = SAFEMINIMUM; - bignum = 1. / smlnum; - dlabad_(&smlnum, &bignum); - smlnum /= PRECISION; - bignum = 1. / smlnum; - *scale = 1.; - - if (lsame_(normin, "N")) { - -/* Compute the 1-norm of each column, not including the diagonal. */ - - if (upper) { - -/* A is upper triangular. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = j - 1; - cnorm[j] = dzasum_(&i__2, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* A is lower triangular. */ - - i__1 = *n - 1; - for (j = 1; j <= i__1; ++j) { - i__2 = *n - j; - cnorm[j] = dzasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); -/* L20: */ - } - cnorm[*n] = 0.; - } - } - -/* - Scale the column norms by TSCAL if the maximum element in CNORM is - greater than BIGNUM/2. -*/ - - imax = idamax_(n, &cnorm[1], &c__1); - tmax = cnorm[imax]; - if (tmax <= bignum * .5) { - tscal = 1.; - } else { - tscal = .5 / (smlnum * tmax); - dscal_(n, &tscal, &cnorm[1], &c__1); - } - -/* - Compute a bound on the computed solution vector to see if the - Level 2 BLAS routine ZTRSV can be used. -*/ - - xmax = 0.; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { -/* Computing MAX */ - i__2 = j; - d__3 = xmax, d__4 = (d__1 = x[i__2].r / 2., abs(d__1)) + (d__2 = - d_imag(&x[j]) / 2., abs(d__2)); - xmax = max(d__3,d__4); -/* L30: */ - } - xbnd = xmax; - - if (notran) { - -/* Compute the growth in A * x = b. */ - - if (upper) { - jfirst = *n; - jlast = 1; - jinc = -1; - } else { - jfirst = 1; - jlast = *n; - jinc = 1; - } - - if (tscal != 1.) { - grow = 0.; - goto L60; - } - - if (nounit) { - -/* - A is non-unit triangular. - - Compute GROW = 1/G(j) and XBND = 1/M(j). - Initially, G(0) = max{x(i), i=1,...,n}. -*/ - - grow = .5 / max(xbnd,smlnum); - xbnd = grow; - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L60; - } - - i__3 = j + j * a_dim1; - tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( - d__2)); - - if (tjj >= smlnum) { - -/* - M(j) = G(j-1) / abs(A(j,j)) - - Computing MIN -*/ - d__1 = xbnd, d__2 = min(1.,tjj) * grow; - xbnd = min(d__1,d__2); - } else { - -/* M(j) could overflow, set XBND to 0. */ - - xbnd = 0.; - } - - if (tjj + cnorm[j] >= smlnum) { - -/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ - - grow *= tjj / (tjj + cnorm[j]); - } else { - -/* G(j) could overflow, set GROW to 0. */ - - grow = 0.; - } -/* L40: */ - } - grow = xbnd; - } else { - -/* - A is unit triangular. - - Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. - - Computing MIN -*/ - d__1 = 1., d__2 = .5 / max(xbnd,smlnum); - grow = min(d__1,d__2); - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L60; - } - -/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ - - grow *= 1. / (cnorm[j] + 1.); -/* L50: */ - } - } -L60: - - ; - } else { - -/* Compute the growth in A**T * x = b or A**H * x = b. */ - - if (upper) { - jfirst = 1; - jlast = *n; - jinc = 1; - } else { - jfirst = *n; - jlast = 1; - jinc = -1; - } - - if (tscal != 1.) { - grow = 0.; - goto L90; - } - - if (nounit) { - -/* - A is non-unit triangular. - - Compute GROW = 1/G(j) and XBND = 1/M(j). - Initially, M(0) = max{x(i), i=1,...,n}. -*/ - - grow = .5 / max(xbnd,smlnum); - xbnd = grow; - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L90; - } - -/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ - - xj = cnorm[j] + 1.; -/* Computing MIN */ - d__1 = grow, d__2 = xbnd / xj; - grow = min(d__1,d__2); - - i__3 = j + j * a_dim1; - tjjs.r = a[i__3].r, tjjs.i = a[i__3].i; - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( - d__2)); - - if (tjj >= smlnum) { - -/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ - - if (xj > tjj) { - xbnd *= tjj / xj; - } - } else { - -/* M(j) could overflow, set XBND to 0. */ - - xbnd = 0.; - } -/* L70: */ - } - grow = min(grow,xbnd); - } else { - -/* - A is unit triangular. - - Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. - - Computing MIN -*/ - d__1 = 1., d__2 = .5 / max(xbnd,smlnum); - grow = min(d__1,d__2); - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* Exit the loop if the growth factor is too small. */ - - if (grow <= smlnum) { - goto L90; - } - -/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ - - xj = cnorm[j] + 1.; - grow /= xj; -/* L80: */ - } - } -L90: - ; - } - - if (grow * tscal > smlnum) { - -/* - Use the Level 2 BLAS solve if the reciprocal of the bound on - elements of X is not too small. -*/ - - ztrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); - } else { - -/* Use a Level 1 BLAS solve, scaling intermediate results. */ - - if (xmax > bignum * .5) { - -/* - Scale X so that its components are less than or equal to - BIGNUM in absolute value. -*/ - - *scale = bignum * .5 / xmax; - zdscal_(n, scale, &x[1], &c__1); - xmax = bignum; - } else { - xmax *= 2.; - } - - if (notran) { - -/* Solve A * x = b */ - - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ - - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), - abs(d__2)); - if (nounit) { - i__3 = j + j * a_dim1; - z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3].i; - tjjs.r = z__1.r, tjjs.i = z__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.; - if (tscal == 1.) { - goto L110; - } - } - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs( - d__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.) { - if (xj > tjj * bignum) { - -/* Scale x by 1/b(j). */ - - rec = 1. / xj; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) - , abs(d__2)); - } else if (tjj > 0.) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* - Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM - to avoid overflow when dividing by A(j,j). -*/ - - rec = tjj * bignum / xj; - if (cnorm[j] > 1.) { - -/* - Scale by 1/CNORM(j) to avoid overflow when - multiplying x(j) times column j. -*/ - - rec /= cnorm[j]; - } - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) - , abs(d__2)); - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0, and compute a solution to A*x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0., x[i__4].i = 0.; -/* L100: */ - } - i__3 = j; - x[i__3].r = 1., x[i__3].i = 0.; - xj = 1.; - *scale = 0.; - xmax = 0.; - } -L110: - -/* - Scale x if necessary to avoid overflow when adding a - multiple of column j of A. -*/ - - if (xj > 1.) { - rec = 1. / xj; - if (cnorm[j] > (bignum - xmax) * rec) { - -/* Scale x by 1/(2*abs(x(j))). */ - - rec *= .5; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - } - } else if (xj * cnorm[j] > bignum - xmax) { - -/* Scale x by 1/2. */ - - zdscal_(n, &c_b2210, &x[1], &c__1); - *scale *= .5; - } - - if (upper) { - if (j > 1) { - -/* - Compute the update - x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) -*/ - - i__3 = j - 1; - i__4 = j; - z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; - z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; - zaxpy_(&i__3, &z__1, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - i__3 = j - 1; - i__ = izamax_(&i__3, &x[1], &c__1); - i__3 = i__; - xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( - &x[i__]), abs(d__2)); - } - } else { - if (j < *n) { - -/* - Compute the update - x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) -*/ - - i__3 = *n - j; - i__4 = j; - z__2.r = -x[i__4].r, z__2.i = -x[i__4].i; - z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; - zaxpy_(&i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - i__3 = *n - j; - i__ = j + izamax_(&i__3, &x[j + 1], &c__1); - i__3 = i__; - xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( - &x[i__]), abs(d__2)); - } - } -/* L120: */ - } - - } else if (lsame_(trans, "T")) { - -/* Solve A**T * x = b */ - - i__2 = jlast; - i__1 = jinc; - for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Compute x(j) = b(j) - sum A(k,j)*x(k). - k<>j -*/ - - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), - abs(d__2)); - uscal.r = tscal, uscal.i = 0.; - rec = 1. / max(xmax,1.); - if (cnorm[j] > (bignum - xj) * rec) { - -/* If x(j) could overflow, scale x by 1/(2*XMAX). */ - - rec *= .5; - if (nounit) { - i__3 = j + j * a_dim1; - z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] - .i; - tjjs.r = z__1.r, tjjs.i = z__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.; - } - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), - abs(d__2)); - if (tjj > 1.) { - -/* - Divide by A(j,j) when scaling x if A(j,j) > 1. - - Computing MIN -*/ - d__1 = 1., d__2 = rec * tjj; - rec = min(d__1,d__2); - zladiv_(&z__1, &uscal, &tjjs); - uscal.r = z__1.r, uscal.i = z__1.i; - } - if (rec < 1.) { - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - - csumj.r = 0., csumj.i = 0.; - if (uscal.r == 1. && uscal.i == 0.) { - -/* - If the scaling needed for A in the dot product is 1, - call ZDOTU to perform the dot product. -*/ - - if (upper) { - i__3 = j - 1; - zdotu_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - csumj.r = z__1.r, csumj.i = z__1.i; - } else if (j < *n) { - i__3 = *n - j; - zdotu_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - csumj.r = z__1.r, csumj.i = z__1.i; - } - } else { - -/* Otherwise, use in-line code for the dot product. */ - - if (upper) { - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * a_dim1; - z__3.r = a[i__4].r * uscal.r - a[i__4].i * - uscal.i, z__3.i = a[i__4].r * uscal.i + a[ - i__4].i * uscal.r; - i__5 = i__; - z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, - z__2.i = z__3.r * x[i__5].i + z__3.i * x[ - i__5].r; - z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + - z__2.i; - csumj.r = z__1.r, csumj.i = z__1.i; -/* L130: */ - } - } else if (j < *n) { - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - i__4 = i__ + j * a_dim1; - z__3.r = a[i__4].r * uscal.r - a[i__4].i * - uscal.i, z__3.i = a[i__4].r * uscal.i + a[ - i__4].i * uscal.r; - i__5 = i__; - z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, - z__2.i = z__3.r * x[i__5].i + z__3.i * x[ - i__5].r; - z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + - z__2.i; - csumj.r = z__1.r, csumj.i = z__1.i; -/* L140: */ - } - } - } - - z__1.r = tscal, z__1.i = 0.; - if (uscal.r == z__1.r && uscal.i == z__1.i) { - -/* - Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) - was not used to scale the dotproduct. -*/ - - i__3 = j; - i__4 = j; - z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - - csumj.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) - , abs(d__2)); - if (nounit) { - i__3 = j + j * a_dim1; - z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3] - .i; - tjjs.r = z__1.r, tjjs.i = z__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.; - if (tscal == 1.) { - goto L160; - } - } - -/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ - - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), - abs(d__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.) { - if (xj > tjj * bignum) { - -/* Scale X by 1/abs(x(j)). */ - - rec = 1. / xj; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } else if (tjj > 0.) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ - - rec = tjj * bignum / xj; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0 and compute a solution to A**T *x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0., x[i__4].i = 0.; -/* L150: */ - } - i__3 = j; - x[i__3].r = 1., x[i__3].i = 0.; - *scale = 0.; - xmax = 0.; - } -L160: - ; - } else { - -/* - Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot - product has already been divided by 1/A(j,j). -*/ - - i__3 = j; - zladiv_(&z__2, &x[j], &tjjs); - z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } -/* Computing MAX */ - i__3 = j; - d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = - d_imag(&x[j]), abs(d__2)); - xmax = max(d__3,d__4); -/* L170: */ - } - - } else { - -/* Solve A**H * x = b */ - - i__1 = jlast; - i__2 = jinc; - for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Compute x(j) = b(j) - sum A(k,j)*x(k). - k<>j -*/ - - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]), - abs(d__2)); - uscal.r = tscal, uscal.i = 0.; - rec = 1. / max(xmax,1.); - if (cnorm[j] > (bignum - xj) * rec) { - -/* If x(j) could overflow, scale x by 1/(2*XMAX). */ - - rec *= .5; - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; - tjjs.r = z__1.r, tjjs.i = z__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.; - } - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), - abs(d__2)); - if (tjj > 1.) { - -/* - Divide by A(j,j) when scaling x if A(j,j) > 1. - - Computing MIN -*/ - d__1 = 1., d__2 = rec * tjj; - rec = min(d__1,d__2); - zladiv_(&z__1, &uscal, &tjjs); - uscal.r = z__1.r, uscal.i = z__1.i; - } - if (rec < 1.) { - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - - csumj.r = 0., csumj.i = 0.; - if (uscal.r == 1. && uscal.i == 0.) { - -/* - If the scaling needed for A in the dot product is 1, - call ZDOTC to perform the dot product. -*/ - - if (upper) { - i__3 = j - 1; - zdotc_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], - &c__1); - csumj.r = z__1.r, csumj.i = z__1.i; - } else if (j < *n) { - i__3 = *n - j; - zdotc_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, & - x[j + 1], &c__1); - csumj.r = z__1.r, csumj.i = z__1.i; - } - } else { - -/* Otherwise, use in-line code for the dot product. */ - - if (upper) { - i__3 = j - 1; - for (i__ = 1; i__ <= i__3; ++i__) { - d_cnjg(&z__4, &a[i__ + j * a_dim1]); - z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, - z__3.i = z__4.r * uscal.i + z__4.i * - uscal.r; - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, - z__2.i = z__3.r * x[i__4].i + z__3.i * x[ - i__4].r; - z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + - z__2.i; - csumj.r = z__1.r, csumj.i = z__1.i; -/* L180: */ - } - } else if (j < *n) { - i__3 = *n; - for (i__ = j + 1; i__ <= i__3; ++i__) { - d_cnjg(&z__4, &a[i__ + j * a_dim1]); - z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, - z__3.i = z__4.r * uscal.i + z__4.i * - uscal.r; - i__4 = i__; - z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, - z__2.i = z__3.r * x[i__4].i + z__3.i * x[ - i__4].r; - z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + - z__2.i; - csumj.r = z__1.r, csumj.i = z__1.i; -/* L190: */ - } - } - } - - z__1.r = tscal, z__1.i = 0.; - if (uscal.r == z__1.r && uscal.i == z__1.i) { - -/* - Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) - was not used to scale the dotproduct. -*/ - - i__3 = j; - i__4 = j; - z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i - - csumj.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - i__3 = j; - xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]) - , abs(d__2)); - if (nounit) { - d_cnjg(&z__2, &a[j + j * a_dim1]); - z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; - tjjs.r = z__1.r, tjjs.i = z__1.i; - } else { - tjjs.r = tscal, tjjs.i = 0.; - if (tscal == 1.) { - goto L210; - } - } - -/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ - - tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), - abs(d__2)); - if (tjj > smlnum) { - -/* abs(A(j,j)) > SMLNUM: */ - - if (tjj < 1.) { - if (xj > tjj * bignum) { - -/* Scale X by 1/abs(x(j)). */ - - rec = 1. / xj; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } else if (tjj > 0.) { - -/* 0 < abs(A(j,j)) <= SMLNUM: */ - - if (xj > tjj * bignum) { - -/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ - - rec = tjj * bignum / xj; - zdscal_(n, &rec, &x[1], &c__1); - *scale *= rec; - xmax *= rec; - } - i__3 = j; - zladiv_(&z__1, &x[j], &tjjs); - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } else { - -/* - A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and - scale = 0 and compute a solution to A**H *x = 0. -*/ - - i__3 = *n; - for (i__ = 1; i__ <= i__3; ++i__) { - i__4 = i__; - x[i__4].r = 0., x[i__4].i = 0.; -/* L200: */ - } - i__3 = j; - x[i__3].r = 1., x[i__3].i = 0.; - *scale = 0.; - xmax = 0.; - } -L210: - ; - } else { - -/* - Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot - product has already been divided by 1/A(j,j). -*/ - - i__3 = j; - zladiv_(&z__2, &x[j], &tjjs); - z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i; - x[i__3].r = z__1.r, x[i__3].i = z__1.i; - } -/* Computing MAX */ - i__3 = j; - d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = - d_imag(&x[j]), abs(d__2)); - xmax = max(d__3,d__4); -/* L220: */ - } - } - *scale /= tscal; - } - -/* Scale the column norms by 1/TSCAL for return. */ - - if (tscal != 1.) { - d__1 = 1. / tscal; - dscal_(n, &d__1, &cnorm[1], &c__1); - } - - return 0; - -/* End of ZLATRS */ - -} /* zlatrs_ */ - -/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - doublecomplex z__1; - - /* Local variables */ - static integer i__; - static doublereal aii; - extern logical lsame_(char *, char *); - extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( - integer *, doublereal *, doublecomplex *, integer *), zlacgv_( - integer *, doublecomplex *, integer *); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLAUU2 computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the unblocked form of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLAUU2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - aii = a[i__2].r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - i__3 = *n - i__; - zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[ - i__ + (i__ + 1) * a_dim1], lda); - d__1 = aii * aii + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - i__2 = i__ - 1; - i__3 = *n - i__; - z__1.r = aii, z__1.i = 0.; - zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) * - a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & - z__1, &a[i__ * a_dim1 + 1], &c__1); - i__2 = *n - i__; - zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); - } else { - zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); - } -/* L10: */ - } - - } else { - -/* Compute the product L' * L. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + i__ * a_dim1; - aii = a[i__2].r; - if (i__ < *n) { - i__2 = i__ + i__ * a_dim1; - i__3 = *n - i__; - zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[ - i__ + 1 + i__ * a_dim1], &c__1); - d__1 = aii * aii + z__1.r; - a[i__2].r = d__1, a[i__2].i = 0.; - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - i__2 = *n - i__; - i__3 = i__ - 1; - z__1.r = aii, z__1.i = 0.; - zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + - 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & - z__1, &a[i__ + a_dim1], lda); - i__2 = i__ - 1; - zlacgv_(&i__2, &a[i__ + a_dim1], lda); - } else { - zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); - } -/* L20: */ - } - } - - return 0; - -/* End of ZLAUU2 */ - -} /* zlauu2_ */ - -/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, ib, nb; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zherk_(char *, char *, integer *, - integer *, doublereal *, doublecomplex *, integer *, doublereal *, - doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *), - zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK auxiliary routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZLAUUM computes the product U * U' or L' * L, where the triangular - factor U or L is stored in the upper or lower triangular part of - the array A. - - If UPLO = 'U' or 'u' then the upper triangle of the result is stored, - overwriting the factor U in A. - If UPLO = 'L' or 'l' then the lower triangle of the result is stored, - overwriting the factor L in A. - - This is the blocked form of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the triangular factor stored in the array A - is upper or lower triangular: - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the triangular factor U or L. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the triangular factor U or L. - On exit, if UPLO = 'U', the upper triangle of A is - overwritten with the upper triangle of the product U * U'; - if UPLO = 'L', the lower triangle of A is overwritten with - the lower triangle of the product L' * L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZLAUUM", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - zlauu2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute the product U * U'. */ - - i__1 = *n; - i__2 = nb; - for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", & - i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[ - i__ * a_dim1 + 1], lda); - zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - zgemm_("No transpose", "Conjugate transpose", &i__3, &ib, - &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, & - a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__ - * a_dim1 + 1], lda); - i__3 = *n - i__ - ib + 1; - zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[ - i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__ - + i__ * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the product L' * L. */ - - i__2 = *n; - i__1 = nb; - for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { -/* Computing MIN */ - i__3 = nb, i__4 = *n - i__ + 1; - ib = min(i__3,i__4); - i__3 = i__ - 1; - ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", & - ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[ - i__ + a_dim1], lda); - zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info); - if (i__ + ib <= *n) { - i__3 = i__ - 1; - i__4 = *n - i__ - ib + 1; - zgemm_("Conjugate transpose", "No transpose", &ib, &i__3, - &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, & - a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ + - a_dim1], lda); - i__3 = *n - i__ - ib + 1; - zherk_("Lower", "Conjugate transpose", &ib, &i__3, & - c_b1015, &a[i__ + ib + i__ * a_dim1], lda, & - c_b1015, &a[i__ + i__ * a_dim1], lda); - } -/* L20: */ - } - } - } - - return 0; - -/* End of ZLAUUM */ - -} /* zlauum_ */ - -/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublereal d__1; - doublecomplex z__1, z__2; - - /* Builtin functions */ - double sqrt(doublereal); - - /* Local variables */ - static integer j; - static doublereal ajj; - extern logical lsame_(char *, char *); - extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *); - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( - integer *, doublereal *, doublecomplex *, integer *), zlacgv_( - integer *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZPOTF2 computes the Cholesky factorization of a complex Hermitian - positive definite matrix A. - - The factorization has the form - A = U' * U , if UPLO = 'U', or - A = L * L', if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the unblocked version of the algorithm, calling Level 2 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the upper or lower triangular part of the - Hermitian matrix A is stored. - = 'U': Upper triangular - = 'L': Lower triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - n by n upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U'*U or A = L*L'. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - > 0: if INFO = k, the leading minor of order k is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZPOTF2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute U(J,J) and test for non-positive-definiteness. */ - - i__2 = j + j * a_dim1; - d__1 = a[i__2].r; - i__3 = j - 1; - zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1] - , &c__1); - z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; - ajj = z__1.r; - if (ajj <= 0.) { - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.; - goto L30; - } - ajj = sqrt(ajj); - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.; - -/* Compute elements J+1:N of row J. */ - - if (j < *n) { - i__2 = j - 1; - zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - i__3 = *n - j; - z__1.r = -1., z__1.i = -0.; - zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1 - + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + ( - j + 1) * a_dim1], lda); - i__2 = j - 1; - zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1); - i__2 = *n - j; - d__1 = 1. / ajj; - zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); - } -/* L10: */ - } - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - -/* Compute L(J,J) and test for non-positive-definiteness. */ - - i__2 = j + j * a_dim1; - d__1 = a[i__2].r; - i__3 = j - 1; - zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda); - z__1.r = d__1 - z__2.r, z__1.i = -z__2.i; - ajj = z__1.r; - if (ajj <= 0.) { - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.; - goto L30; - } - ajj = sqrt(ajj); - i__2 = j + j * a_dim1; - a[i__2].r = ajj, a[i__2].i = 0.; - -/* Compute elements J+1:N of column J. */ - - if (j < *n) { - i__2 = j - 1; - zlacgv_(&i__2, &a[j + a_dim1], lda); - i__2 = *n - j; - i__3 = j - 1; - z__1.r = -1., z__1.i = -0.; - zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1] - , lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j * - a_dim1], &c__1); - i__2 = j - 1; - zlacgv_(&i__2, &a[j + a_dim1], lda); - i__2 = *n - j; - d__1 = 1. / ajj; - zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - goto L40; - -L30: - *info = j; - -L40: - return 0; - -/* End of ZPOTF2 */ - -} /* zpotf2_ */ - -/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - doublecomplex z__1; - - /* Local variables */ - static integer j, jb, nb; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, - integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), zherk_(char *, char *, integer *, - integer *, doublereal *, doublecomplex *, integer *, doublereal *, - doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *), - zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZPOTRF computes the Cholesky factorization of a complex Hermitian - positive definite matrix A. - - The factorization has the form - A = U**H * U, if UPLO = 'U', or - A = L * L**H, if UPLO = 'L', - where U is an upper triangular matrix and L is lower triangular. - - This is the block version of the algorithm, calling Level 3 BLAS. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the Hermitian matrix A. If UPLO = 'U', the leading - N-by-N upper triangular part of A contains the upper - triangular part of the matrix A, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of A contains the lower - triangular part of the matrix A, and the strictly upper - triangular part of A is not referenced. - - On exit, if INFO = 0, the factor U or L from the Cholesky - factorization A = U**H*U or A = L*L**H. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i is not - positive definite, and the factorization could not be - completed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZPOTRF", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Determine the block size for this environment. */ - - nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)1); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code. */ - - zpotf2_(uplo, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code. */ - - if (upper) { - -/* Compute the Cholesky factorization A = U'*U. */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, & - a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1], - lda); - zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block row. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("Conjugate transpose", "No transpose", &jb, &i__3, - &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb) - * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) * - a_dim1], lda); - i__3 = *n - j - jb + 1; - ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", - &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[ - j + (j + jb) * a_dim1], lda); - } -/* L10: */ - } - - } else { - -/* Compute the Cholesky factorization A = L*L'. */ - - i__2 = *n; - i__1 = nb; - for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { - -/* - Update and factorize the current diagonal block and test - for non-positive-definiteness. - - Computing MIN -*/ - i__3 = nb, i__4 = *n - j + 1; - jb = min(i__3,i__4); - i__3 = j - 1; - zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j + - a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda); - zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); - if (*info != 0) { - goto L30; - } - if (j + jb <= *n) { - -/* Compute the current block column. */ - - i__3 = *n - j - jb + 1; - i__4 = j - 1; - z__1.r = -1., z__1.i = -0.; - zgemm_("No transpose", "Conjugate transpose", &i__3, &jb, - &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j + - a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1], - lda); - i__3 = *n - j - jb + 1; - ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit" - , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[ - j + jb + j * a_dim1], lda); - } -/* L20: */ - } - } - } - goto L40; - -L30: - *info = *info + j - 1; - -L40: - return 0; - -/* End of ZPOTRF */ - -} /* zpotrf_ */ - -/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a, - integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_( - char *, integer *, doublecomplex *, integer *, integer *), - ztrtri_(char *, char *, integer *, doublecomplex *, integer *, - integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - March 31, 1993 - - - Purpose - ======= - - ZPOTRI computes the inverse of a complex Hermitian positive definite - matrix A using the Cholesky factorization A = U**H*U or A = L*L**H - computed by ZPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the triangular factor U or L from the Cholesky - factorization A = U**H*U or A = L*L**H, as computed by - ZPOTRF. - On exit, the upper or lower triangle of the (Hermitian) - inverse of A, overwriting the input factor U or L. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the (i,i) element of the factor U or L is - zero, and the inverse could not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*lda < max(1,*n)) { - *info = -4; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZPOTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Invert the triangular Cholesky factor U or L. */ - - ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info); - if (*info > 0) { - return 0; - } - -/* Form inv(U)*inv(U)' or inv(L)'*inv(L). */ - - zlauum_(uplo, n, &a[a_offset], lda, info); - - return 0; - -/* End of ZPOTRI */ - -} /* zpotri_ */ - -/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs, - doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, - integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, b_dim1, b_offset, i__1; - - /* Local variables */ - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *), - xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZPOTRS solves a system of linear equations A*X = B with a Hermitian - positive definite matrix A using the Cholesky factorization - A = U**H*U or A = L*L**H computed by ZPOTRF. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A is stored; - = 'L': Lower triangle of A is stored. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - NRHS (input) INTEGER - The number of right hand sides, i.e., the number of columns - of the matrix B. NRHS >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,N) - The triangular factor U or L from the Cholesky factorization - A = U**H*U or A = L*L**H, as computed by ZPOTRF. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) - On entry, the right hand side matrix B. - On exit, the solution matrix X. - - LDB (input) INTEGER - The leading dimension of the array B. LDB >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - b_dim1 = *ldb; - b_offset = 1 + b_dim1; - b -= b_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*nrhs < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*ldb < max(1,*n)) { - *info = -7; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZPOTRS", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0 || *nrhs == 0) { - return 0; - } - - if (upper) { - -/* - Solve A*X = B where A = U'*U. - - Solve U'*X = B, overwriting B with X. -*/ - - ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, & - c_b60, &a[a_offset], lda, &b[b_offset], ldb); - -/* Solve U*X = B, overwriting B with X. */ - - ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, & - a[a_offset], lda, &b[b_offset], ldb); - } else { - -/* - Solve A*X = B where A = L*L'. - - Solve L*X = B, overwriting B with X. -*/ - - ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, & - a[a_offset], lda, &b[b_offset], ldb); - -/* Solve L'*X = B, overwriting B with X. */ - - ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, & - c_b60, &a[a_offset], lda, &b[b_offset], ldb); - } - - return 0; - -/* End of ZPOTRS */ - -} /* zpotrs_ */ - -/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__, - doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, - integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, - integer *liwork, integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2, i__3, i__4; - doublereal d__1, d__2; - - /* Builtin functions */ - double log(doublereal); - integer pow_ii(integer *, integer *); - double sqrt(doublereal); - - /* Local variables */ - static integer i__, j, k, m; - static doublereal p; - static integer ii, ll, end, lgn; - static doublereal eps, tiny; - extern logical lsame_(char *, char *); - static integer lwmin, start; - extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), zlaed0_(integer *, integer *, - doublereal *, doublereal *, doublecomplex *, integer *, - doublecomplex *, integer *, doublereal *, integer *, integer *); - - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *), dstedc_(char *, integer *, - doublereal *, doublereal *, doublereal *, integer *, doublereal *, - integer *, integer *, integer *, integer *), dlaset_( - char *, integer *, integer *, doublereal *, doublereal *, - doublereal *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, - integer *), zlacrm_(integer *, integer *, doublecomplex *, - integer *, doublereal *, integer *, doublecomplex *, integer *, - doublereal *); - static integer liwmin, icompz; - extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, - doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, - integer *, doublecomplex *, integer *); - static doublereal orgnrm; - static integer lrwmin; - static logical lquery; - static integer smlsiz; - extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, - doublereal *, doublecomplex *, integer *, doublereal *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the divide and conquer method. - The eigenvectors of a full or band complex Hermitian matrix can also - be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this - matrix to tridiagonal form. - - This code makes very mild assumptions about floating point - arithmetic. It will work on machines with a guard digit in - add/subtract, or on those binary machines without guard digits - which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. - It could conceivably fail on hexadecimal or decimal machines - without guard digits, but we know of none. See DLAED3 for details. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'I': Compute eigenvectors of tridiagonal matrix also. - = 'V': Compute eigenvectors of original Hermitian matrix - also. On entry, Z contains the unitary matrix used - to reduce the original matrix to tridiagonal form. - - N (input) INTEGER - The dimension of the symmetric tridiagonal matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the subdiagonal elements of the tridiagonal matrix. - On exit, E has been destroyed. - - Z (input/output) COMPLEX*16 array, dimension (LDZ,N) - On entry, if COMPZ = 'V', then Z contains the unitary - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original Hermitian matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1. - If eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. - If COMPZ = 'V' and N > 1, LWORK must be at least N*N. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - RWORK (workspace/output) DOUBLE PRECISION array, - dimension (LRWORK) - On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. - - LRWORK (input) INTEGER - The dimension of the array RWORK. - If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. - If COMPZ = 'V' and N > 1, LRWORK must be at least - 1 + 3*N + 2*N*lg N + 3*N**2 , - where lg( N ) = smallest integer k such - that 2**k >= N. - If COMPZ = 'I' and N > 1, LRWORK must be at least - 1 + 4*N + 2*N**2 . - - If LRWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the RWORK array, - returns this value as the first entry of the RWORK array, and - no error message related to LRWORK is issued by XERBLA. - - IWORK (workspace/output) INTEGER array, dimension (LIWORK) - On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. - - LIWORK (input) INTEGER - The dimension of the array IWORK. - If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. - If COMPZ = 'V' or N > 1, LIWORK must be at least - 6 + 6*N + 5*N*lg N. - If COMPZ = 'I' or N > 1, LIWORK must be at least - 3 + 5*N . - - If LIWORK = -1, then a workspace query is assumed; the - routine only calculates the optimal size of the IWORK array, - returns this value as the first entry of the IWORK array, and - no error message related to LIWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while - working on the submatrix lying in rows and columns - INFO/(N+1) through mod(INFO,N+1). - - Further Details - =============== - - Based on contributions by - Jeff Rutter, Computer Science Division, University of California - at Berkeley, USA - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - --rwork; - --iwork; - - /* Function Body */ - *info = 0; - lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (*n <= 1 || icompz <= 0) { - lwmin = 1; - liwmin = 1; - lrwmin = 1; - } else { - lgn = (integer) (log((doublereal) (*n)) / log(2.)); - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (pow_ii(&c__2, &lgn) < *n) { - ++lgn; - } - if (icompz == 1) { - lwmin = *n * *n; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; - liwmin = *n * 6 + 6 + *n * 5 * lgn; - } else if (icompz == 2) { - lwmin = 1; -/* Computing 2nd power */ - i__1 = *n; - lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); - liwmin = *n * 5 + 3; - } - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } else if (*lwork < lwmin && ! lquery) { - *info = -8; - } else if (*lrwork < lrwmin && ! lquery) { - *info = -10; - } else if (*liwork < liwmin && ! lquery) { - *info = -12; - } - - if (*info == 0) { - work[1].r = (doublereal) lwmin, work[1].i = 0.; - rwork[1] = (doublereal) lrwmin; - iwork[1] = liwmin; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZSTEDC", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - if (*n == 1) { - if (icompz != 0) { - i__1 = z_dim1 + 1; - z__[i__1].r = 1., z__[i__1].i = 0.; - } - return 0; - } - - smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( - ftnlen)6, (ftnlen)1); - -/* - If the following conditional clause is removed, then the routine - will use the Divide and Conquer routine to compute only the - eigenvalues, which requires (3N + 3N**2) real workspace and - (2 + 5N + 2N lg(N)) integer workspace. - Since on many architectures DSTERF is much faster than any other - algorithm for finding eigenvalues only, it is used here - as the default. - - If COMPZ = 'N', use DSTERF to compute the eigenvalues. -*/ - - if (icompz == 0) { - dsterf_(n, &d__[1], &e[1], info); - return 0; - } - -/* - If N is smaller than the minimum divide size (SMLSIZ+1), then - solve the problem with another solver. -*/ - - if (*n <= smlsiz) { - if (icompz == 0) { - dsterf_(n, &d__[1], &e[1], info); - return 0; - } else if (icompz == 2) { - zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], - info); - return 0; - } else { - zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], - info); - return 0; - } - } - -/* If COMPZ = 'I', we simply call DSTEDC instead. */ - - if (icompz == 2) { - dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n); - ll = *n * *n + 1; - i__1 = *lrwork - ll + 1; - dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & - iwork[1], liwork, info); - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * z_dim1; - i__4 = (j - 1) * *n + i__; - z__[i__3].r = rwork[i__4], z__[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - return 0; - } - -/* - From now on, only option left to be handled is COMPZ = 'V', - i.e. ICOMPZ = 1. - - Scale. -*/ - - orgnrm = dlanst_("M", n, &d__[1], &e[1]); - if (orgnrm == 0.) { - return 0; - } - - eps = EPSILON; - - start = 1; - -/* while ( START <= N ) */ - -L30: - if (start <= *n) { - -/* - Let END be the position of the next subdiagonal entry such that - E( END ) <= TINY or END = N if no such subdiagonal exists. The - matrix identified by the elements between START and END - constitutes an independent sub-problem. -*/ - - end = start; -L40: - if (end < *n) { - tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 = - d__[end + 1], abs(d__2))); - if ((d__1 = e[end], abs(d__1)) > tiny) { - ++end; - goto L40; - } - } - -/* (Sub) Problem determined. Compute its size and solve it. */ - - m = end - start + 1; - if (m > smlsiz) { - *info = smlsiz; - -/* Scale. */ - - orgnrm = dlanst_("M", &m, &d__[start], &e[start]); - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[ - start], &m, info); - i__1 = m - 1; - i__2 = m - 1; - dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[ - start], &i__2, info); - - zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1], - ldz, &work[1], n, &rwork[1], &iwork[1], info); - if (*info > 0) { - *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m - + 1) + start - 1; - return 0; - } - -/* Scale back. */ - - dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[ - start], &m, info); - - } else { - dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m * - m + 1], info); - zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & - work[1], n, &rwork[m * m + 1]); - zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz); - if (*info > 0) { - *info = start * (*n + 1) + end; - return 0; - } - } - - start = end + 1; - goto L30; - } - -/* - endwhile - - If the problem split any number of times, then the eigenvalues - will not be properly ordered. Here we permute the eigenvalues - (and the associated eigenvectors) into ascending order. -*/ - - if (m != *n) { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L50: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L60: */ - } - } - - work[1].r = (doublereal) lwmin, work[1].i = 0.; - rwork[1] = (doublereal) lrwmin; - iwork[1] = liwmin; - - return 0; - -/* End of ZSTEDC */ - -} /* zstedc_ */ - -/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__, - doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, - integer *info) -{ - /* System generated locals */ - integer z_dim1, z_offset, i__1, i__2; - doublereal d__1, d__2; - - /* Builtin functions */ - double sqrt(doublereal), d_sign(doublereal *, doublereal *); - - /* Local variables */ - static doublereal b, c__, f, g; - static integer i__, j, k, l, m; - static doublereal p, r__, s; - static integer l1, ii, mm, lm1, mm1, nm1; - static doublereal rt1, rt2, eps; - static integer lsv; - static doublereal tst, eps2; - static integer lend, jtot; - extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal - *, doublereal *, doublereal *); - extern logical lsame_(char *, char *); - static doublereal anorm; - extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, - integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, - integer *, doublecomplex *, integer *), dlaev2_(doublereal *, - doublereal *, doublereal *, doublereal *, doublereal *, - doublereal *, doublereal *); - static integer lendm1, lendp1; - - static integer iscale; - extern /* Subroutine */ int dlascl_(char *, integer *, integer *, - doublereal *, doublereal *, integer *, integer *, doublereal *, - integer *, integer *); - static doublereal safmin; - extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, - doublereal *, doublereal *, doublereal *); - static doublereal safmax; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); - extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, - integer *); - static integer lendsv; - static doublereal ssfmin; - static integer nmaxit, icompz; - static doublereal ssfmax; - extern /* Subroutine */ int zlaset_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a - symmetric tridiagonal matrix using the implicit QL or QR method. - The eigenvectors of a full or band complex Hermitian matrix can also - be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this - matrix to tridiagonal form. - - Arguments - ========= - - COMPZ (input) CHARACTER*1 - = 'N': Compute eigenvalues only. - = 'V': Compute eigenvalues and eigenvectors of the original - Hermitian matrix. On entry, Z must contain the - unitary matrix used to reduce the original matrix - to tridiagonal form. - = 'I': Compute eigenvalues and eigenvectors of the - tridiagonal matrix. Z is initialized to the identity - matrix. - - N (input) INTEGER - The order of the matrix. N >= 0. - - D (input/output) DOUBLE PRECISION array, dimension (N) - On entry, the diagonal elements of the tridiagonal matrix. - On exit, if INFO = 0, the eigenvalues in ascending order. - - E (input/output) DOUBLE PRECISION array, dimension (N-1) - On entry, the (n-1) subdiagonal elements of the tridiagonal - matrix. - On exit, E has been destroyed. - - Z (input/output) COMPLEX*16 array, dimension (LDZ, N) - On entry, if COMPZ = 'V', then Z contains the unitary - matrix used in the reduction to tridiagonal form. - On exit, if INFO = 0, then if COMPZ = 'V', Z contains the - orthonormal eigenvectors of the original Hermitian matrix, - and if COMPZ = 'I', Z contains the orthonormal eigenvectors - of the symmetric tridiagonal matrix. - If COMPZ = 'N', then Z is not referenced. - - LDZ (input) INTEGER - The leading dimension of the array Z. LDZ >= 1, and if - eigenvectors are desired, then LDZ >= max(1,N). - - WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) - If COMPZ = 'N', then WORK is not referenced. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: the algorithm has failed to find all the eigenvalues in - a total of 30*N iterations; if INFO = i, then i - elements of E have not converged to zero; on exit, D - and E contain the elements of a symmetric tridiagonal - matrix which is unitarily similar to the original - matrix. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - --d__; - --e; - z_dim1 = *ldz; - z_offset = 1 + z_dim1; - z__ -= z_offset; - --work; - - /* Function Body */ - *info = 0; - - if (lsame_(compz, "N")) { - icompz = 0; - } else if (lsame_(compz, "V")) { - icompz = 1; - } else if (lsame_(compz, "I")) { - icompz = 2; - } else { - icompz = -1; - } - if (icompz < 0) { - *info = -1; - } else if (*n < 0) { - *info = -2; - } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { - *info = -6; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZSTEQR", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - - if (*n == 1) { - if (icompz == 2) { - i__1 = z_dim1 + 1; - z__[i__1].r = 1., z__[i__1].i = 0.; - } - return 0; - } - -/* Determine the unit roundoff and over/underflow thresholds. */ - - eps = EPSILON; -/* Computing 2nd power */ - d__1 = eps; - eps2 = d__1 * d__1; - safmin = SAFEMINIMUM; - safmax = 1. / safmin; - ssfmax = sqrt(safmax) / 3.; - ssfmin = sqrt(safmin) / eps2; - -/* - Compute the eigenvalues and eigenvectors of the tridiagonal - matrix. -*/ - - if (icompz == 2) { - zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz); - } - - nmaxit = *n * 30; - jtot = 0; - -/* - Determine where the matrix splits and choose QL or QR iteration - for each block, according to whether top or bottom diagonal - element is smaller. -*/ - - l1 = 1; - nm1 = *n - 1; - -L10: - if (l1 > *n) { - goto L160; - } - if (l1 > 1) { - e[l1 - 1] = 0.; - } - if (l1 <= nm1) { - i__1 = nm1; - for (m = l1; m <= i__1; ++m) { - tst = (d__1 = e[m], abs(d__1)); - if (tst == 0.) { - goto L30; - } - if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m - + 1], abs(d__2))) * eps) { - e[m] = 0.; - goto L30; - } -/* L20: */ - } - } - m = *n; - -L30: - l = l1; - lsv = l; - lend = m; - lendsv = lend; - l1 = m + 1; - if (lend == l) { - goto L10; - } - -/* Scale submatrix in rows and columns L to LEND */ - - i__1 = lend - l + 1; - anorm = dlanst_("I", &i__1, &d__[l], &e[l]); - iscale = 0; - if (anorm == 0.) { - goto L10; - } - if (anorm > ssfmax) { - iscale = 1; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, - info); - } else if (anorm < ssfmin) { - iscale = 2; - i__1 = lend - l + 1; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, - info); - i__1 = lend - l; - dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, - info); - } - -/* Choose between QL and QR iteration */ - - if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { - lend = lsv; - l = lendsv; - } - - if (lend > l) { - -/* - QL Iteration - - Look for small subdiagonal element. -*/ - -L40: - if (l != lend) { - lendm1 = lend - 1; - i__1 = lendm1; - for (m = l; m <= i__1; ++m) { -/* Computing 2nd power */ - d__2 = (d__1 = e[m], abs(d__1)); - tst = d__2 * d__2; - if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m - + 1], abs(d__2)) + safmin) { - goto L60; - } -/* L50: */ - } - } - - m = lend; - -L60: - if (m < lend) { - e[m] = 0.; - } - p = d__[l]; - if (m == l) { - goto L80; - } - -/* - If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l + 1) { - if (icompz > 0) { - dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); - work[l] = c__; - work[*n - 1 + l] = s; - zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], & - z__[l * z_dim1 + 1], ldz); - } else { - dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); - } - d__[l] = rt1; - d__[l + 1] = rt2; - e[l] = 0.; - l += 2; - if (l <= lend) { - goto L40; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l + 1] - p) / (e[l] * 2.); - r__ = dlapy2_(&g, &c_b1015); - g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); - - s = 1.; - c__ = 1.; - p = 0.; - -/* Inner loop */ - - mm1 = m - 1; - i__1 = l; - for (i__ = mm1; i__ >= i__1; --i__) { - f = s * e[i__]; - b = c__ * e[i__]; - dlartg_(&g, &f, &c__, &s, &r__); - if (i__ != m - 1) { - e[i__ + 1] = r__; - } - g = d__[i__ + 1] - p; - r__ = (d__[i__] - g) * s + c__ * 2. * b; - p = s * r__; - d__[i__ + 1] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = -s; - } - -/* L70: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = m - l + 1; - zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[l] = g; - goto L40; - -/* Eigenvalue found. */ - -L80: - d__[l] = p; - - ++l; - if (l <= lend) { - goto L40; - } - goto L140; - - } else { - -/* - QR Iteration - - Look for small superdiagonal element. -*/ - -L90: - if (l != lend) { - lendp1 = lend + 1; - i__1 = lendp1; - for (m = l; m >= i__1; --m) { -/* Computing 2nd power */ - d__2 = (d__1 = e[m - 1], abs(d__1)); - tst = d__2 * d__2; - if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m - - 1], abs(d__2)) + safmin) { - goto L110; - } -/* L100: */ - } - } - - m = lend; - -L110: - if (m > lend) { - e[m - 1] = 0.; - } - p = d__[l]; - if (m == l) { - goto L130; - } - -/* - If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 - to compute its eigensystem. -*/ - - if (m == l - 1) { - if (icompz > 0) { - dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) - ; - work[m] = c__; - work[*n - 1 + m] = s; - zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], & - z__[(l - 1) * z_dim1 + 1], ldz); - } else { - dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); - } - d__[l - 1] = rt1; - d__[l] = rt2; - e[l - 1] = 0.; - l += -2; - if (l >= lend) { - goto L90; - } - goto L140; - } - - if (jtot == nmaxit) { - goto L140; - } - ++jtot; - -/* Form shift. */ - - g = (d__[l - 1] - p) / (e[l - 1] * 2.); - r__ = dlapy2_(&g, &c_b1015); - g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); - - s = 1.; - c__ = 1.; - p = 0.; - -/* Inner loop */ - - lm1 = l - 1; - i__1 = lm1; - for (i__ = m; i__ <= i__1; ++i__) { - f = s * e[i__]; - b = c__ * e[i__]; - dlartg_(&g, &f, &c__, &s, &r__); - if (i__ != m) { - e[i__ - 1] = r__; - } - g = d__[i__] - p; - r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; - p = s * r__; - d__[i__] = g + p; - g = c__ * r__ - b; - -/* If eigenvectors are desired, then save rotations. */ - - if (icompz > 0) { - work[i__] = c__; - work[*n - 1 + i__] = s; - } - -/* L120: */ - } - -/* If eigenvectors are desired, then apply saved rotations. */ - - if (icompz > 0) { - mm = l - m + 1; - zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m - * z_dim1 + 1], ldz); - } - - d__[l] -= p; - e[lm1] = g; - goto L90; - -/* Eigenvalue found. */ - -L130: - d__[l] = p; - - --l; - if (l >= lend) { - goto L90; - } - goto L140; - - } - -/* Undo scaling if necessary */ - -L140: - if (iscale == 1) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } else if (iscale == 2) { - i__1 = lendsv - lsv + 1; - dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], - n, info); - i__1 = lendsv - lsv; - dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, - info); - } - -/* - Check for no convergence to an eigenvalue after a total - of N*MAXIT iterations. -*/ - - if (jtot == nmaxit) { - i__1 = *n - 1; - for (i__ = 1; i__ <= i__1; ++i__) { - if (e[i__] != 0.) { - ++(*info); - } -/* L150: */ - } - return 0; - } - goto L10; - -/* Order eigenvalues and eigenvectors. */ - -L160: - if (icompz == 0) { - -/* Use Quick Sort */ - - dlasrt_("I", n, &d__[1], info); - - } else { - -/* Use Selection Sort to minimize swaps of eigenvectors */ - - i__1 = *n; - for (ii = 2; ii <= i__1; ++ii) { - i__ = ii - 1; - k = i__; - p = d__[i__]; - i__2 = *n; - for (j = ii; j <= i__2; ++j) { - if (d__[j] < p) { - k = j; - p = d__[j]; - } -/* L170: */ - } - if (k != i__) { - d__[k] = d__[i__]; - d__[i__] = p; - zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], - &c__1); - } -/* L180: */ - } - } - return 0; - -/* End of ZSTEQR */ - -} /* zsteqr_ */ - -/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, - integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, - integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer - *m, doublecomplex *work, doublereal *rwork, integer *info) -{ - /* System generated locals */ - integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, - i__2, i__3, i__4, i__5; - doublereal d__1, d__2, d__3; - doublecomplex z__1, z__2; - - /* Builtin functions */ - double d_imag(doublecomplex *); - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, k, ii, ki, is; - static doublereal ulp; - static logical allv; - static doublereal unfl, ovfl, smin; - static logical over; - static doublereal scale; - extern logical lsame_(char *, char *); - static doublereal remax; - static logical leftv, bothv; - extern /* Subroutine */ int zgemv_(char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *, doublecomplex *, doublecomplex *, integer *); - static logical somev; - extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, - doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); - - extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( - integer *, doublereal *, doublecomplex *, integer *); - extern integer izamax_(integer *, doublecomplex *, integer *); - static logical rightv; - extern doublereal dzasum_(integer *, doublecomplex *, integer *); - static doublereal smlnum; - extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublereal *, doublereal *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZTREVC computes some or all of the right and/or left eigenvectors of - a complex upper triangular matrix T. - - The right eigenvector x and the left eigenvector y of T corresponding - to an eigenvalue w are defined by: - - T*x = w*x, y'*T = w*y' - - where y' denotes the conjugate transpose of the vector y. - - If all eigenvectors are requested, the routine may either return the - matrices X and/or Y of right or left eigenvectors of T, or the - products Q*X and/or Q*Y, where Q is an input unitary - matrix. If T was obtained from the Schur factorization of an - original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of - right or left eigenvectors of A. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'R': compute right eigenvectors only; - = 'L': compute left eigenvectors only; - = 'B': compute both right and left eigenvectors. - - HOWMNY (input) CHARACTER*1 - = 'A': compute all right and/or left eigenvectors; - = 'B': compute all right and/or left eigenvectors, - and backtransform them using the input matrices - supplied in VR and/or VL; - = 'S': compute selected right and/or left eigenvectors, - specified by the logical array SELECT. - - SELECT (input) LOGICAL array, dimension (N) - If HOWMNY = 'S', SELECT specifies the eigenvectors to be - computed. - If HOWMNY = 'A' or 'B', SELECT is not referenced. - To select the eigenvector corresponding to the j-th - eigenvalue, SELECT(j) must be set to .TRUE.. - - N (input) INTEGER - The order of the matrix T. N >= 0. - - T (input/output) COMPLEX*16 array, dimension (LDT,N) - The upper triangular matrix T. T is modified, but restored - on exit. - - LDT (input) INTEGER - The leading dimension of the array T. LDT >= max(1,N). - - VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) - On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must - contain an N-by-N matrix Q (usually the unitary matrix Q of - Schur vectors returned by ZHSEQR). - On exit, if SIDE = 'L' or 'B', VL contains: - if HOWMNY = 'A', the matrix Y of left eigenvectors of T; - VL is lower triangular. The i-th column - VL(i) of VL is the eigenvector corresponding - to T(i,i). - if HOWMNY = 'B', the matrix Q*Y; - if HOWMNY = 'S', the left eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VL, in the same order as their - eigenvalues. - If SIDE = 'R', VL is not referenced. - - LDVL (input) INTEGER - The leading dimension of the array VL. LDVL >= max(1,N) if - SIDE = 'L' or 'B'; LDVL >= 1 otherwise. - - VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) - On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must - contain an N-by-N matrix Q (usually the unitary matrix Q of - Schur vectors returned by ZHSEQR). - On exit, if SIDE = 'R' or 'B', VR contains: - if HOWMNY = 'A', the matrix X of right eigenvectors of T; - VR is upper triangular. The i-th column - VR(i) of VR is the eigenvector corresponding - to T(i,i). - if HOWMNY = 'B', the matrix Q*X; - if HOWMNY = 'S', the right eigenvectors of T specified by - SELECT, stored consecutively in the columns - of VR, in the same order as their - eigenvalues. - If SIDE = 'L', VR is not referenced. - - LDVR (input) INTEGER - The leading dimension of the array VR. LDVR >= max(1,N) if - SIDE = 'R' or 'B'; LDVR >= 1 otherwise. - - MM (input) INTEGER - The number of columns in the arrays VL and/or VR. MM >= M. - - M (output) INTEGER - The number of columns in the arrays VL and/or VR actually - used to store the eigenvectors. If HOWMNY = 'A' or 'B', M - is set to N. Each selected eigenvector occupies one - column. - - WORK (workspace) COMPLEX*16 array, dimension (2*N) - - RWORK (workspace) DOUBLE PRECISION array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - Further Details - =============== - - The algorithm used in this program is basically backward (forward) - substitution, with scaling to make the the code robust against - possible overflow. - - Each eigenvector is normalized so that the element of largest - magnitude has magnitude 1; here the magnitude of a complex number - (x,y) is taken to be |x| + |y|. - - ===================================================================== - - - Decode and test the input parameters -*/ - - /* Parameter adjustments */ - --select; - t_dim1 = *ldt; - t_offset = 1 + t_dim1; - t -= t_offset; - vl_dim1 = *ldvl; - vl_offset = 1 + vl_dim1; - vl -= vl_offset; - vr_dim1 = *ldvr; - vr_offset = 1 + vr_dim1; - vr -= vr_offset; - --work; - --rwork; - - /* Function Body */ - bothv = lsame_(side, "B"); - rightv = lsame_(side, "R") || bothv; - leftv = lsame_(side, "L") || bothv; - - allv = lsame_(howmny, "A"); - over = lsame_(howmny, "B"); - somev = lsame_(howmny, "S"); - -/* - Set M to the number of columns required to store the selected - eigenvectors. -*/ - - if (somev) { - *m = 0; - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (select[j]) { - ++(*m); - } -/* L10: */ - } - } else { - *m = *n; - } - - *info = 0; - if (! rightv && ! leftv) { - *info = -1; - } else if (! allv && ! over && ! somev) { - *info = -2; - } else if (*n < 0) { - *info = -4; - } else if (*ldt < max(1,*n)) { - *info = -6; - } else if (*ldvl < 1 || leftv && *ldvl < *n) { - *info = -8; - } else if (*ldvr < 1 || rightv && *ldvr < *n) { - *info = -10; - } else if (*mm < *m) { - *info = -11; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZTREVC", &i__1); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - -/* Set the constants to control overflow. */ - - unfl = SAFEMINIMUM; - ovfl = 1. / unfl; - dlabad_(&unfl, &ovfl); - ulp = PRECISION; - smlnum = unfl * (*n / ulp); - -/* Store the diagonal elements of T in working array WORK. */ - - i__1 = *n; - for (i__ = 1; i__ <= i__1; ++i__) { - i__2 = i__ + *n; - i__3 = i__ + i__ * t_dim1; - work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; -/* L20: */ - } - -/* - Compute 1-norm of each column of strictly upper triangular - part of T to control overflow in triangular solver. -*/ - - rwork[1] = 0.; - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - i__2 = j - 1; - rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1); -/* L30: */ - } - - if (rightv) { - -/* Compute right eigenvectors. */ - - is = *m; - for (ki = *n; ki >= 1; --ki) { - - if (somev) { - if (! select[ki]) { - goto L80; - } - } -/* Computing MAX */ - i__1 = ki + ki * t_dim1; - d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[ - ki + ki * t_dim1]), abs(d__2))); - smin = max(d__3,smlnum); - - work[1].r = 1., work[1].i = 0.; - -/* Form right-hand side. */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k; - i__3 = k + ki * t_dim1; - z__1.r = -t[i__3].r, z__1.i = -t[i__3].i; - work[i__2].r = z__1.r, work[i__2].i = z__1.i; -/* L40: */ - } - -/* - Solve the triangular system: - (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. -*/ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + k * t_dim1; - i__3 = k + k * t_dim1; - i__4 = ki + ki * t_dim1; - z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4] - .i; - t[i__2].r = z__1.r, t[i__2].i = z__1.i; - i__2 = k + k * t_dim1; - if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * - t_dim1]), abs(d__2)) < smin) { - i__3 = k + k * t_dim1; - t[i__3].r = smin, t[i__3].i = 0.; - } -/* L50: */ - } - - if (ki > 1) { - i__1 = ki - 1; - zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ - t_offset], ldt, &work[1], &scale, &rwork[1], info); - i__1 = ki; - work[i__1].r = scale, work[i__1].i = 0.; - } - -/* Copy the vector x or Q*x to VR and normalize. */ - - if (! over) { - zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1); - - ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); - i__1 = ii + is * vr_dim1; - remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( - &vr[ii + is * vr_dim1]), abs(d__2))); - zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); - - i__1 = *n; - for (k = ki + 1; k <= i__1; ++k) { - i__2 = k + is * vr_dim1; - vr[i__2].r = 0., vr[i__2].i = 0.; -/* L60: */ - } - } else { - if (ki > 1) { - i__1 = ki - 1; - z__1.r = scale, z__1.i = 0.; - zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[ - 1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1); - } - - ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1); - i__1 = ii + ki * vr_dim1; - remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( - &vr[ii + ki * vr_dim1]), abs(d__2))); - zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); - } - -/* Set back the original diagonal elements of T. */ - - i__1 = ki - 1; - for (k = 1; k <= i__1; ++k) { - i__2 = k + k * t_dim1; - i__3 = k + *n; - t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; -/* L70: */ - } - - --is; -L80: - ; - } - } - - if (leftv) { - -/* Compute left eigenvectors. */ - - is = 1; - i__1 = *n; - for (ki = 1; ki <= i__1; ++ki) { - - if (somev) { - if (! select[ki]) { - goto L130; - } - } -/* Computing MAX */ - i__2 = ki + ki * t_dim1; - d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[ - ki + ki * t_dim1]), abs(d__2))); - smin = max(d__3,smlnum); - - i__2 = *n; - work[i__2].r = 1., work[i__2].i = 0.; - -/* Form right-hand side. */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k; - d_cnjg(&z__2, &t[ki + k * t_dim1]); - z__1.r = -z__2.r, z__1.i = -z__2.i; - work[i__3].r = z__1.r, work[i__3].i = z__1.i; -/* L90: */ - } - -/* - Solve the triangular system: - (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. -*/ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k + k * t_dim1; - i__4 = k + k * t_dim1; - i__5 = ki + ki * t_dim1; - z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5] - .i; - t[i__3].r = z__1.r, t[i__3].i = z__1.i; - i__3 = k + k * t_dim1; - if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k * - t_dim1]), abs(d__2)) < smin) { - i__4 = k + k * t_dim1; - t[i__4].r = smin, t[i__4].i = 0.; - } -/* L100: */ - } - - if (ki < *n) { - i__2 = *n - ki; - zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & - i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + - 1], &scale, &rwork[1], info); - i__2 = ki; - work[i__2].r = scale, work[i__2].i = 0.; - } - -/* Copy the vector x or Q*x to VL and normalize. */ - - if (! over) { - i__2 = *n - ki + 1; - zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1) - ; - - i__2 = *n - ki + 1; - ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1; - i__2 = ii + is * vl_dim1; - remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( - &vl[ii + is * vl_dim1]), abs(d__2))); - i__2 = *n - ki + 1; - zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1); - - i__2 = ki - 1; - for (k = 1; k <= i__2; ++k) { - i__3 = k + is * vl_dim1; - vl[i__3].r = 0., vl[i__3].i = 0.; -/* L110: */ - } - } else { - if (ki < *n) { - i__2 = *n - ki; - z__1.r = scale, z__1.i = 0.; - zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1], - ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki * - vl_dim1 + 1], &c__1); - } - - ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1); - i__2 = ii + ki * vl_dim1; - remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( - &vl[ii + ki * vl_dim1]), abs(d__2))); - zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); - } - -/* Set back the original diagonal elements of T. */ - - i__2 = *n; - for (k = ki + 1; k <= i__2; ++k) { - i__3 = k + k * t_dim1; - i__4 = k + *n; - t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; -/* L120: */ - } - - ++is; -L130: - ; - } - } - - return 0; - -/* End of ZTREVC */ - -} /* ztrevc_ */ - -/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n, - doublecomplex *a, integer *lda, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2; - doublecomplex z__1; - - /* Builtin functions */ - void z_div(doublecomplex *, doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer j; - static doublecomplex ajj; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *); - static logical upper; - extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZTRTI2 computes the inverse of a complex upper or lower triangular - matrix. - - This is the Level 2 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - Specifies whether the matrix A is upper or lower triangular. - = 'U': Upper triangular - = 'L': Lower triangular - - DIAG (input) CHARACTER*1 - Specifies whether or not the matrix A is unit triangular. - = 'N': Non-unit triangular - = 'U': Unit triangular - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading n by n upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading n by n lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -k, the k-th argument had an illegal value - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZTRTI2", &i__1); - return 0; - } - - if (upper) { - -/* Compute inverse of upper triangular matrix. */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - if (nounit) { - i__2 = j + j * a_dim1; - z_div(&z__1, &c_b60, &a[j + j * a_dim1]); - a[i__2].r = z__1.r, a[i__2].i = z__1.i; - i__2 = j + j * a_dim1; - z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; - ajj.r = z__1.r, ajj.i = z__1.i; - } else { - z__1.r = -1., z__1.i = -0.; - ajj.r = z__1.r, ajj.i = z__1.i; - } - -/* Compute elements 1:j-1 of j-th column. */ - - i__2 = j - 1; - ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, & - a[j * a_dim1 + 1], &c__1); - i__2 = j - 1; - zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); -/* L10: */ - } - } else { - -/* Compute inverse of lower triangular matrix. */ - - for (j = *n; j >= 1; --j) { - if (nounit) { - i__1 = j + j * a_dim1; - z_div(&z__1, &c_b60, &a[j + j * a_dim1]); - a[i__1].r = z__1.r, a[i__1].i = z__1.i; - i__1 = j + j * a_dim1; - z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; - ajj.r = z__1.r, ajj.i = z__1.i; - } else { - z__1.r = -1., z__1.i = -0.; - ajj.r = z__1.r, ajj.i = z__1.i; - } - if (j < *n) { - -/* Compute elements j+1:n of j-th column. */ - - i__1 = *n - j; - ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j + - 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1); - i__1 = *n - j; - zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); - } -/* L20: */ - } - } - - return 0; - -/* End of ZTRTI2 */ - -} /* ztrti2_ */ - -/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n, - doublecomplex *a, integer *lda, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; - doublecomplex z__1; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer j, jb, nb, nn; - extern logical lsame_(char *, char *); - static logical upper; - extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, - integer *, integer *, doublecomplex *, doublecomplex *, integer *, - doublecomplex *, integer *), - ztrsm_(char *, char *, char *, char *, integer *, integer *, - doublecomplex *, doublecomplex *, integer *, doublecomplex *, - integer *), ztrti2_(char *, char * - , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical nounit; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZTRTRI computes the inverse of a complex upper or lower triangular - matrix A. - - This is the Level 3 BLAS version of the algorithm. - - Arguments - ========= - - UPLO (input) CHARACTER*1 - = 'U': A is upper triangular; - = 'L': A is lower triangular. - - DIAG (input) CHARACTER*1 - = 'N': A is non-unit triangular; - = 'U': A is unit triangular. - - N (input) INTEGER - The order of the matrix A. N >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the triangular matrix A. If UPLO = 'U', the - leading N-by-N upper triangular part of the array A contains - the upper triangular matrix, and the strictly lower - triangular part of A is not referenced. If UPLO = 'L', the - leading N-by-N lower triangular part of the array A contains - the lower triangular matrix, and the strictly upper - triangular part of A is not referenced. If DIAG = 'U', the - diagonal elements of A are also not referenced and are - assumed to be 1. - On exit, the (triangular) inverse of the original matrix, in - the same storage format. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, A(i,i) is exactly zero. The triangular - matrix is singular and its inverse can not be computed. - - ===================================================================== - - - Test the input parameters. -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - - /* Function Body */ - *info = 0; - upper = lsame_(uplo, "U"); - nounit = lsame_(diag, "N"); - if (! upper && ! lsame_(uplo, "L")) { - *info = -1; - } else if (! nounit && ! lsame_(diag, "U")) { - *info = -2; - } else if (*n < 0) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZTRTRI", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - return 0; - } - -/* Check for singularity if non-unit. */ - - if (nounit) { - i__1 = *n; - for (*info = 1; *info <= i__1; ++(*info)) { - i__2 = *info + *info * a_dim1; - if (a[i__2].r == 0. && a[i__2].i == 0.) { - return 0; - } -/* L10: */ - } - *info = 0; - } - -/* - Determine the block size for this environment. - - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = uplo; - i__3[1] = 1, a__1[1] = diag; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( - ftnlen)2); - if (nb <= 1 || nb >= *n) { - -/* Use unblocked code */ - - ztrti2_(uplo, diag, n, &a[a_offset], lda, info); - } else { - -/* Use blocked code */ - - if (upper) { - -/* Compute inverse of upper triangular matrix */ - - i__1 = *n; - i__2 = nb; - for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *n - j + 1; - jb = min(i__4,i__5); - -/* Compute rows 1:j-1 of current block column */ - - i__4 = j - 1; - ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & - c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda); - i__4 = j - 1; - z__1.r = -1., z__1.i = -0.; - ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & - z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], - lda); - -/* Compute inverse of current diagonal block */ - - ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L20: */ - } - } else { - -/* Compute inverse of lower triangular matrix */ - - nn = (*n - 1) / nb * nb + 1; - i__2 = -nb; - for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { -/* Computing MIN */ - i__1 = nb, i__4 = *n - j + 1; - jb = min(i__1,i__4); - if (j + jb <= *n) { - -/* Compute rows j+jb:n of current block column */ - - i__1 = *n - j - jb + 1; - ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, - &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j - + jb + j * a_dim1], lda); - i__1 = *n - j - jb + 1; - z__1.r = -1., z__1.i = -0.; - ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, - &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j * - a_dim1], lda); - } - -/* Compute inverse of current diagonal block */ - - ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info); -/* L30: */ - } - } - } - - return 0; - -/* End of ZTRTRI */ - -} /* ztrtri_ */ - -/* Subroutine */ int zung2r_(integer *m, integer *n, integer *k, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZUNG2R generates an m by n complex matrix Q with orthonormal columns, - which is defined as the first n columns of a product of k elementary - reflectors of order m - - Q = H(1) H(2) . . . H(k) - - as returned by ZGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by ZGEQRF in the first k columns of its array - argument A. - On exit, the m by n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQRF. - - WORK (workspace) COMPLEX*16 array, dimension (N) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNG2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - return 0; - } - -/* Initialise columns k+1:n to columns of the unit matrix */ - - i__1 = *n; - for (j = *k + 1; j <= i__1; ++j) { - i__2 = *m; - for (l = 1; l <= i__2; ++l) { - i__3 = l + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L10: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; -/* L20: */ - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i) to A(i:m,i:n) from the left */ - - if (i__ < *n) { - i__1 = i__ + i__ * a_dim1; - a[i__1].r = 1., a[i__1].i = 0.; - i__1 = *m - i__ + 1; - i__2 = *n - i__; - zlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ - i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]); - } - if (i__ < *m) { - i__1 = *m - i__; - i__2 = i__; - z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; - zscal_(&i__1, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1); - } - i__1 = i__ + i__ * a_dim1; - i__2 = i__; - z__1.r = 1. - tau[i__2].r, z__1.i = 0. - tau[i__2].i; - a[i__1].r = z__1.r, a[i__1].i = z__1.i; - -/* Set A(1:i-1,i) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - i__2 = l + i__ * a_dim1; - a[i__2].r = 0., a[i__2].i = 0.; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of ZUNG2R */ - -} /* zung2r_ */ - -/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - - /* Local variables */ - static integer i__, j, nb, mn; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical wantq; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int zunglq_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *), zungqr_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNGBR generates one of the complex unitary matrices Q or P**H - determined by ZGEBRD when reducing a complex matrix A to bidiagonal - form: A = Q * B * P**H. Q and P**H are defined as products of - elementary reflectors H(i) or G(i) respectively. - - If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q - is of order M: - if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n - columns of Q, where m >= n >= k; - if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an - M-by-M matrix. - - If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H - is of order N: - if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m - rows of P**H, where n >= m >= k; - if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as - an N-by-N matrix. - - Arguments - ========= - - VECT (input) CHARACTER*1 - Specifies whether the matrix Q or the matrix P**H is - required, as defined in the transformation applied by ZGEBRD: - = 'Q': generate Q; - = 'P': generate P**H. - - M (input) INTEGER - The number of rows of the matrix Q or P**H to be returned. - M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q or P**H to be returned. - N >= 0. - If VECT = 'Q', M >= N >= min(M,K); - if VECT = 'P', N >= M >= min(N,K). - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original M-by-K - matrix reduced by ZGEBRD. - If VECT = 'P', the number of rows in the original K-by-N - matrix reduced by ZGEBRD. - K >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by ZGEBRD. - On exit, the M-by-N matrix Q or P**H. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= M. - - TAU (input) COMPLEX*16 array, dimension - (min(M,K)) if VECT = 'Q' - (min(N,K)) if VECT = 'P' - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i), which determines Q or P**H, as - returned by ZGEBRD in its array argument TAUQ or TAUP. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,min(M,N)). - For optimum performance LWORK >= min(M,N)*NB, where NB - is the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - wantq = lsame_(vect, "Q"); - mn = min(*m,*n); - lquery = *lwork == -1; - if (! wantq && ! lsame_(vect, "P")) { - *info = -1; - } else if (*m < 0) { - *info = -2; - } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( - *m > *n || *m < min(*n,*k))) { - *info = -3; - } else if (*k < 0) { - *info = -4; - } else if (*lda < max(1,*m)) { - *info = -6; - } else if (*lwork < max(1,mn) && ! lquery) { - *info = -9; - } - - if (*info == 0) { - if (wantq) { - nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } else { - nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( - ftnlen)1); - } - lwkopt = max(1,mn) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNGBR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - if (wantq) { - -/* - Form Q, determined by a call to ZGEBRD to reduce an m-by-k - matrix -*/ - - if (*m >= *k) { - -/* If m >= k, assume m >= n >= k */ - - zungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If m < k, assume m = n - - Shift the vectors which define the elementary reflectors one - column to the right, and set the first row and column of Q - to those of the unit matrix -*/ - - for (j = *m; j >= 2; --j) { - i__1 = j * a_dim1 + 1; - a[i__1].r = 0., a[i__1].i = 0.; - i__1 = *m; - for (i__ = j + 1; i__ <= i__1; ++i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__ + (j - 1) * a_dim1; - a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; -/* L10: */ - } -/* L20: */ - } - i__1 = a_dim1 + 1; - a[i__1].r = 1., a[i__1].i = 0.; - i__1 = *m; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = i__ + a_dim1; - a[i__2].r = 0., a[i__2].i = 0.; -/* L30: */ - } - if (*m > 1) { - -/* Form Q(2:m,2:m) */ - - i__1 = *m - 1; - i__2 = *m - 1; - i__3 = *m - 1; - zungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } else { - -/* - Form P', determined by a call to ZGEBRD to reduce a k-by-n - matrix -*/ - - if (*k < *n) { - -/* If k < n, assume k <= m <= n */ - - zunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & - iinfo); - - } else { - -/* - If k >= n, assume m = n - - Shift the vectors which define the elementary reflectors one - row downward, and set the first row and column of P' to - those of the unit matrix -*/ - - i__1 = a_dim1 + 1; - a[i__1].r = 1., a[i__1].i = 0.; - i__1 = *n; - for (i__ = 2; i__ <= i__1; ++i__) { - i__2 = i__ + a_dim1; - a[i__2].r = 0., a[i__2].i = 0.; -/* L40: */ - } - i__1 = *n; - for (j = 2; j <= i__1; ++j) { - for (i__ = j - 1; i__ >= 2; --i__) { - i__2 = i__ + j * a_dim1; - i__3 = i__ - 1 + j * a_dim1; - a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; -/* L50: */ - } - i__2 = j * a_dim1 + 1; - a[i__2].r = 0., a[i__2].i = 0.; -/* L60: */ - } - if (*n > 1) { - -/* Form P'(2:n,2:n) */ - - i__1 = *n - 1; - i__2 = *n - 1; - i__3 = *n - 1; - zunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ - 1], &work[1], lwork, &iinfo); - } - } - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNGBR */ - -} /* zungbr_ */ - -/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, nb, nh, iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNGHR generates a complex unitary matrix Q which is defined as the - product of IHI-ILO elementary reflectors of order N, as returned by - ZGEHRD: - - Q = H(ilo) H(ilo+1) . . . H(ihi-1). - - Arguments - ========= - - N (input) INTEGER - The order of the matrix Q. N >= 0. - - ILO (input) INTEGER - IHI (input) INTEGER - ILO and IHI must have the same values as in the previous call - of ZGEHRD. Q is equal to the unit matrix except in the - submatrix Q(ilo+1:ihi,ilo+1:ihi). - 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the vectors which define the elementary reflectors, - as returned by ZGEHRD. - On exit, the N-by-N unitary matrix Q. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,N). - - TAU (input) COMPLEX*16 array, dimension (N-1) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEHRD. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= IHI-ILO. - For optimum performance LWORK >= (IHI-ILO)*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nh = *ihi - *ilo; - lquery = *lwork == -1; - if (*n < 0) { - *info = -1; - } else if (*ilo < 1 || *ilo > max(1,*n)) { - *info = -2; - } else if (*ihi < min(*ilo,*n) || *ihi > *n) { - *info = -3; - } else if (*lda < max(1,*n)) { - *info = -5; - } else if (*lwork < max(1,nh) && ! lquery) { - *info = -8; - } - - if (*info == 0) { - nb = ilaenv_(&c__1, "ZUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, ( - ftnlen)1); - lwkopt = max(1,nh) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNGHR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - -/* - Shift the vectors which define the elementary reflectors one - column to the right, and set the first ilo and the last n-ihi - rows and columns to those of the unit matrix -*/ - - i__1 = *ilo + 1; - for (j = *ihi; j >= i__1; --j) { - i__2 = j - 1; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L10: */ - } - i__2 = *ihi; - for (i__ = j + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - i__4 = i__ + (j - 1) * a_dim1; - a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; -/* L20: */ - } - i__2 = *n; - for (i__ = *ihi + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L30: */ - } -/* L40: */ - } - i__1 = *ilo; - for (j = 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L50: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; -/* L60: */ - } - i__1 = *n; - for (j = *ihi + 1; j <= i__1; ++j) { - i__2 = *n; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L70: */ - } - i__2 = j + j * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; -/* L80: */ - } - - if (nh > 0) { - -/* Generate Q(ilo+1:ihi,ilo+1:ihi) */ - - zungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[* - ilo], &work[1], lwork, &iinfo); - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNGHR */ - -} /* zunghr_ */ - -/* Subroutine */ int zungl2_(integer *m, integer *n, integer *k, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3; - doublecomplex z__1, z__2; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, j, l; - extern /* Subroutine */ int zscal_(integer *, doublecomplex *, - doublecomplex *, integer *), zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, - which is defined as the first m rows of a product of k elementary - reflectors of order n - - Q = H(k)' . . . H(2)' H(1)' - - as returned by ZGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by ZGELQF in the first k rows of its array argument A. - On exit, the m by n matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGELQF. - - WORK (workspace) COMPLEX*16 array, dimension (M) - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNGL2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - return 0; - } - - if (*k < *m) { - -/* Initialise rows k+1:m to rows of the unit matrix */ - - i__1 = *n; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (l = *k + 1; l <= i__2; ++l) { - i__3 = l + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L10: */ - } - if (j > *k && j <= *m) { - i__2 = j + j * a_dim1; - a[i__2].r = 1., a[i__2].i = 0.; - } -/* L20: */ - } - } - - for (i__ = *k; i__ >= 1; --i__) { - -/* Apply H(i)' to A(i:m,i:n) from the right */ - - if (i__ < *n) { - i__1 = *n - i__; - zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); - if (i__ < *m) { - i__1 = i__ + i__ * a_dim1; - a[i__1].r = 1., a[i__1].i = 0.; - i__1 = *m - i__; - i__2 = *n - i__ + 1; - d_cnjg(&z__1, &tau[i__]); - zlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & - z__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]); - } - i__1 = *n - i__; - i__2 = i__; - z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; - zscal_(&i__1, &z__1, &a[i__ + (i__ + 1) * a_dim1], lda); - i__1 = *n - i__; - zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); - } - i__1 = i__ + i__ * a_dim1; - d_cnjg(&z__2, &tau[i__]); - z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; - a[i__1].r = z__1.r, a[i__1].i = z__1.i; - -/* Set A(i,1:i-1) to zero */ - - i__1 = i__ - 1; - for (l = 1; l <= i__1; ++l) { - i__2 = i__ + l * a_dim1; - a[i__2].r = 0., a[i__2].i = 0.; -/* L30: */ - } -/* L40: */ - } - return 0; - -/* End of ZUNGL2 */ - -} /* zungl2_ */ - -/* Subroutine */ int zunglq_(integer *m, integer *n, integer *k, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int zungl2_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static logical lquery; - static integer lwkopt; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, - which is defined as the first M rows of a product of K elementary - reflectors of order N - - Q = H(k)' . . . H(2)' H(1)' - - as returned by ZGELQF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. N >= M. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. M >= K >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the i-th row must contain the vector which defines - the elementary reflector H(i), for i = 1,2,...,k, as returned - by ZGELQF in the first k rows of its array argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGELQF. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,M). - For optimum performance LWORK >= M*NB, where NB is - the optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit; - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*m) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < *m) { - *info = -2; - } else if (*k < 0 || *k > *m) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*m) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNGLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m <= 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *m; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGLQ", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *m; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGLQ", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk rows are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(kk+1:m,1:kk) to zero. */ - - i__1 = kk; - for (j = 1; j <= i__1; ++j) { - i__2 = *m; - for (i__ = kk + 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *m) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - zungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *m) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *n - i__ + 1; - zlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H' to A(i+ib:m,i:n) from the right */ - - i__2 = *m - i__ - ib + 1; - i__3 = *n - i__ + 1; - zlarfb_("Right", "Conjugate transpose", "Forward", "Rowwise", - &i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ - ib + 1], &ldwork); - } - -/* Apply H' to columns i:n of current block */ - - i__2 = *n - i__ + 1; - zungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set columns 1:i-1 of current block to zero */ - - i__2 = i__ - 1; - for (j = 1; j <= i__2; ++j) { - i__3 = i__ + ib - 1; - for (l = i__; l <= i__3; ++l) { - i__4 = l + j * a_dim1; - a[i__4].r = 0., a[i__4].i = 0.; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1].r = (doublereal) iws, work[1].i = 0.; - return 0; - -/* End of ZUNGLQ */ - -} /* zunglq_ */ - -/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k, - doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * - work, integer *lwork, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, i__1, i__2, i__3, i__4; - - /* Local variables */ - static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; - extern /* Subroutine */ int zung2r_(integer *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, - which is defined as the first N columns of a product of K elementary - reflectors of order M - - Q = H(1) H(2) . . . H(k) - - as returned by ZGEQRF. - - Arguments - ========= - - M (input) INTEGER - The number of rows of the matrix Q. M >= 0. - - N (input) INTEGER - The number of columns of the matrix Q. M >= N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines the - matrix Q. N >= K >= 0. - - A (input/output) COMPLEX*16 array, dimension (LDA,N) - On entry, the i-th column must contain the vector which - defines the elementary reflector H(i), for i = 1,2,...,k, as - returned by ZGEQRF in the first k columns of its array - argument A. - On exit, the M-by-N matrix Q. - - LDA (input) INTEGER - The first dimension of the array A. LDA >= max(1,M). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQRF. - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. LWORK >= max(1,N). - For optimum performance LWORK >= N*NB, where NB is the - optimal blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument has an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - --work; - - /* Function Body */ - *info = 0; - nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); - lwkopt = max(1,*n) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - lquery = *lwork == -1; - if (*m < 0) { - *info = -1; - } else if (*n < 0 || *n > *m) { - *info = -2; - } else if (*k < 0 || *k > *n) { - *info = -3; - } else if (*lda < max(1,*m)) { - *info = -5; - } else if (*lwork < max(1,*n) && ! lquery) { - *info = -8; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNGQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*n <= 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - nx = 0; - iws = *n; - if (nb > 1 && nb < *k) { - -/* - Determine when to cross over from blocked to unblocked code. - - Computing MAX -*/ - i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGQR", " ", m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)1); - nx = max(i__1,i__2); - if (nx < *k) { - -/* Determine if workspace is large enough for blocked code. */ - - ldwork = *n; - iws = ldwork * nb; - if (*lwork < iws) { - -/* - Not enough workspace to use optimal NB: reduce NB and - determine the minimum value of NB. -*/ - - nb = *lwork / ldwork; -/* Computing MAX */ - i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGQR", " ", m, n, k, &c_n1, - (ftnlen)6, (ftnlen)1); - nbmin = max(i__1,i__2); - } - } - } - - if (nb >= nbmin && nb < *k && nx < *k) { - -/* - Use blocked code after the last block. - The first kk columns are handled by the block method. -*/ - - ki = (*k - nx - 1) / nb * nb; -/* Computing MIN */ - i__1 = *k, i__2 = ki + nb; - kk = min(i__1,i__2); - -/* Set A(1:kk,kk+1:n) to zero. */ - - i__1 = *n; - for (j = kk + 1; j <= i__1; ++j) { - i__2 = kk; - for (i__ = 1; i__ <= i__2; ++i__) { - i__3 = i__ + j * a_dim1; - a[i__3].r = 0., a[i__3].i = 0.; -/* L10: */ - } -/* L20: */ - } - } else { - kk = 0; - } - -/* Use unblocked code for the last or only block. */ - - if (kk < *n) { - i__1 = *m - kk; - i__2 = *n - kk; - i__3 = *k - kk; - zung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & - tau[kk + 1], &work[1], &iinfo); - } - - if (kk > 0) { - -/* Use blocked code */ - - i__1 = -nb; - for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { -/* Computing MIN */ - i__2 = nb, i__3 = *k - i__ + 1; - ib = min(i__2,i__3); - if (i__ + ib <= *n) { - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__2 = *m - i__ + 1; - zlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], &work[1], &ldwork); - -/* Apply H to A(i:m,i+ib:n) from the left */ - - i__2 = *m - i__ + 1; - i__3 = *n - i__ - ib + 1; - zlarfb_("Left", "No transpose", "Forward", "Columnwise", & - i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ - 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & - work[ib + 1], &ldwork); - } - -/* Apply H to rows i:m of current block */ - - i__2 = *m - i__ + 1; - zung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & - work[1], &iinfo); - -/* Set rows 1:i-1 of current block to zero */ - - i__2 = i__ + ib - 1; - for (j = i__; j <= i__2; ++j) { - i__3 = i__ - 1; - for (l = 1; l <= i__3; ++l) { - i__4 = l + j * a_dim1; - a[i__4].r = 0., a[i__4].i = 0.; -/* L30: */ - } -/* L40: */ - } -/* L50: */ - } - } - - work[1].r = (doublereal) iws, work[1].i = 0.; - return 0; - -/* End of ZUNGQR */ - -} /* zungqr_ */ - -/* Subroutine */ int zunm2l_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, i1, i2, i3, mi, ni, nq; - static doublecomplex aii; - static logical left; - static doublecomplex taui; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZUNM2L overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQLF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNM2L", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(1:m-k+i,1:n) */ - - mi = *m - *k + i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,1:n-k+i) */ - - ni = *n - *k + i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } else { - d_cnjg(&z__1, &tau[i__]); - taui.r = z__1.r, taui.i = z__1.i; - } - i__3 = nq - *k + i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = nq - *k + i__ + i__ * a_dim1; - a[i__3].r = 1., a[i__3].i = 0.; - zlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ - c_offset], ldc, &work[1]); - i__3 = nq - *k + i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; -/* L10: */ - } - return 0; - -/* End of ZUNM2L */ - -} /* zunm2l_ */ - -/* Subroutine */ int zunm2r_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static doublecomplex aii; - static logical left; - static doublecomplex taui; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZUNM2R overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQRF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNM2R", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } else { - d_cnjg(&z__1, &tau[i__]); - taui.r = z__1.r, taui.i = z__1.i; - } - i__3 = i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = i__ + i__ * a_dim1; - a[i__3].r = 1., a[i__3].i = 0.; - zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic - + jc * c_dim1], ldc, &work[1]); - i__3 = i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; -/* L10: */ - } - return 0; - -/* End of ZUNM2R */ - -} /* zunm2r_ */ - -/* Subroutine */ int zunmbr_(char *vect, char *side, char *trans, integer *m, - integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex - *tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer * - lwork, integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static logical notran, applyq; - static char transt[1]; - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C - with - SIDE = 'L' SIDE = 'R' - TRANS = 'N': P * C C * P - TRANS = 'C': P**H * C C * P**H - - Here Q and P**H are the unitary matrices determined by ZGEBRD when - reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q - and P**H are defined as products of elementary reflectors H(i) and - G(i) respectively. - - Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the - order of the unitary matrix Q or P**H that is applied. - - If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: - if nq >= k, Q = H(1) H(2) . . . H(k); - if nq < k, Q = H(1) H(2) . . . H(nq-1). - - If VECT = 'P', A is assumed to have been a K-by-NQ matrix: - if k < nq, P = G(1) G(2) . . . G(k); - if k >= nq, P = G(1) G(2) . . . G(nq-1). - - Arguments - ========= - - VECT (input) CHARACTER*1 - = 'Q': apply Q or Q**H; - = 'P': apply P or P**H. - - SIDE (input) CHARACTER*1 - = 'L': apply Q, Q**H, P or P**H from the Left; - = 'R': apply Q, Q**H, P or P**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q or P; - = 'C': Conjugate transpose, apply Q**H or P**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - If VECT = 'Q', the number of columns in the original - matrix reduced by ZGEBRD. - If VECT = 'P', the number of rows in the original - matrix reduced by ZGEBRD. - K >= 0. - - A (input) COMPLEX*16 array, dimension - (LDA,min(nq,K)) if VECT = 'Q' - (LDA,nq) if VECT = 'P' - The vectors which define the elementary reflectors H(i) and - G(i), whose products determine the matrices Q and P, as - returned by ZGEBRD. - - LDA (input) INTEGER - The leading dimension of the array A. - If VECT = 'Q', LDA >= max(1,nq); - if VECT = 'P', LDA >= max(1,min(nq,K)). - - TAU (input) COMPLEX*16 array, dimension (min(nq,K)) - TAU(i) must contain the scalar factor of the elementary - reflector H(i) or G(i) which determines Q or P, as returned - by ZGEBRD in the array argument TAUQ or TAUP. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q - or P*C or P**H*C or C*P or C*P**H. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - applyq = lsame_(vect, "Q"); - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! applyq && ! lsame_(vect, "P")) { - *info = -1; - } else if (! left && ! lsame_(side, "R")) { - *info = -2; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*k < 0) { - *info = -6; - } else /* if(complicated condition) */ { -/* Computing MAX */ - i__1 = 1, i__2 = min(nq,*k); - if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { - *info = -8; - } else if (*ldc < max(1,*m)) { - *info = -11; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -13; - } - } - - if (*info == 0) { - if (applyq) { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *m - 1; - i__2 = *m - 1; - nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = *n - 1; - i__2 = *n - 1; - nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNMBR", &i__1); - return 0; - } else if (lquery) { - } - -/* Quick return if possible */ - - work[1].r = 1., work[1].i = 0.; - if (*m == 0 || *n == 0) { - return 0; - } - - if (applyq) { - -/* Apply Q */ - - if (nq >= *k) { - -/* Q was determined by a call to ZGEBRD with nq >= k */ - - zunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* Q was determined by a call to ZGEBRD with nq < k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - zunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] - , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - } else { - -/* Apply P */ - - if (notran) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - if (nq > *k) { - -/* P was determined by a call to ZGEBRD with nq > k */ - - zunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], lwork, &iinfo); - } else if (nq > 1) { - -/* P was determined by a call to ZGEBRD with nq <= k */ - - if (left) { - mi = *m - 1; - ni = *n; - i1 = 2; - i2 = 1; - } else { - mi = *m; - ni = *n - 1; - i1 = 1; - i2 = 2; - } - i__1 = nq - 1; - zunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, - &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & - iinfo); - } - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNMBR */ - -} /* zunmbr_ */ - -/* Subroutine */ int zunml2_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info) -{ - /* System generated locals */ - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; - doublecomplex z__1; - - /* Builtin functions */ - void d_cnjg(doublecomplex *, doublecomplex *); - - /* Local variables */ - static integer i__, i1, i2, i3, ic, jc, mi, ni, nq; - static doublecomplex aii; - static logical left; - static doublecomplex taui; - extern logical lsame_(char *, char *); - extern /* Subroutine */ int zlarf_(char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *); - static logical notran; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - ZUNML2 overwrites the general complex m-by-n matrix C with - - Q * C if SIDE = 'L' and TRANS = 'N', or - - Q'* C if SIDE = 'L' and TRANS = 'C', or - - C * Q if SIDE = 'R' and TRANS = 'N', or - - C * Q' if SIDE = 'R' and TRANS = 'C', - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k)' . . . H(2)' H(1)' - - as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q' from the Left - = 'R': apply Q or Q' from the Right - - TRANS (input) CHARACTER*1 - = 'N': apply Q (No transpose) - = 'C': apply Q' (Conjugate transpose) - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGELQF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the m-by-n matrix C. - On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace) COMPLEX*16 array, dimension - (N) if SIDE = 'L', - (M) if SIDE = 'R' - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - -/* NQ is the order of Q */ - - if (left) { - nq = *m; - } else { - nq = *n; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNML2", &i__1); - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - return 0; - } - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = 1; - } else { - i1 = *k; - i2 = 1; - i3 = -1; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { - if (left) { - -/* H(i) or H(i)' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H(i) or H(i)' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H(i) or H(i)' */ - - if (notran) { - d_cnjg(&z__1, &tau[i__]); - taui.r = z__1.r, taui.i = z__1.i; - } else { - i__3 = i__; - taui.r = tau[i__3].r, taui.i = tau[i__3].i; - } - if (i__ < nq) { - i__3 = nq - i__; - zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); - } - i__3 = i__ + i__ * a_dim1; - aii.r = a[i__3].r, aii.i = a[i__3].i; - i__3 = i__ + i__ * a_dim1; - a[i__3].r = 1., a[i__3].i = 0.; - zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic + - jc * c_dim1], ldc, &work[1]); - i__3 = i__ + i__ * a_dim1; - a[i__3].r = aii.r, a[i__3].i = aii.i; - if (i__ < nq) { - i__3 = nq - i__; - zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda); - } -/* L10: */ - } - return 0; - -/* End of ZUNML2 */ - -} /* zunml2_ */ - -/* Subroutine */ int zunmlq_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublecomplex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int zunml2_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static logical notran; - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static char transt[1]; - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNMLQ overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k)' . . . H(2)' H(1)' - - as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension - (LDA,M) if SIDE = 'L', - (LDA,N) if SIDE = 'R' - The i-th row must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGELQF in the first k rows of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. LDA >= max(1,K). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGELQF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,*k)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNMLQ", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMLQ", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - zunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - if (notran) { - *(unsigned char *)transt = 'C'; - } else { - *(unsigned char *)transt = 'N'; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - zlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], - lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - zlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__ - + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1], - ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNMLQ */ - -} /* zunmlq_ */ - -/* Subroutine */ int zunmql_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublecomplex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int zunm2l_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static logical notran; - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNMQL overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(k) . . . H(2) H(1) - - as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGEQLF in the last k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQLF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNMQL", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQL", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - zunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && notran || ! left && ! notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - } else { - mi = *m; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i+ib-1) . . . H(i+1) H(i) -*/ - - i__4 = nq - *k + i__ + ib - 1; - zlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] - , lda, &tau[i__], t, &c__65); - if (left) { - -/* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ - - mi = *m - *k + i__ + ib - 1; - } else { - -/* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ - - ni = *n - *k + i__ + ib - 1; - } - -/* Apply H or H' */ - - zlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[ - i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, & - work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNMQL */ - -} /* zunmql_ */ - -/* Subroutine */ int zunmqr_(char *side, char *trans, integer *m, integer *n, - integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, - i__5; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i__; - static doublecomplex t[4160] /* was [65][64] */; - static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws; - static logical left; - extern logical lsame_(char *, char *); - static integer nbmin, iinfo; - extern /* Subroutine */ int zunm2r_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, - integer *, integer *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *, doublecomplex *, integer *, - doublecomplex *, integer *); - static logical notran; - static integer ldwork; - extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, - doublecomplex *, integer *, doublecomplex *, doublecomplex *, - integer *); - static integer lwkopt; - static logical lquery; - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNMQR overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix defined as the product of k - elementary reflectors - - Q = H(1) H(2) . . . H(k) - - as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N - if SIDE = 'R'. - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - K (input) INTEGER - The number of elementary reflectors whose product defines - the matrix Q. - If SIDE = 'L', M >= K >= 0; - if SIDE = 'R', N >= K >= 0. - - A (input) COMPLEX*16 array, dimension (LDA,K) - The i-th column must contain the vector which defines the - elementary reflector H(i), for i = 1,2,...,k, as returned by - ZGEQRF in the first k columns of its array argument A. - A is modified by the routine but restored on exit. - - LDA (input) INTEGER - The leading dimension of the array A. - If SIDE = 'L', LDA >= max(1,M); - if SIDE = 'R', LDA >= max(1,N). - - TAU (input) COMPLEX*16 array, dimension (K) - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZGEQRF. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >= M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - notran = lsame_(trans, "N"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! notran && ! lsame_(trans, "C")) { - *info = -2; - } else if (*m < 0) { - *info = -3; - } else if (*n < 0) { - *info = -4; - } else if (*k < 0 || *k > nq) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - -/* - Determine the block size. NB may be at most NBMAX, where NBMAX - is used to define the local array T. - - Computing MIN - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nb = min(i__1,i__2); - lwkopt = max(1,nw) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__1 = -(*info); - xerbla_("ZUNMQR", &i__1); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || *k == 0) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - nbmin = 2; - ldwork = nw; - if (nb > 1 && nb < *k) { - iws = nw * nb; - if (*lwork < iws) { - nb = *lwork / ldwork; -/* - Computing MAX - Writing concatenation -*/ - i__3[0] = 1, a__1[0] = side; - i__3[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); - i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQR", ch__1, m, n, k, &c_n1, ( - ftnlen)6, (ftnlen)2); - nbmin = max(i__1,i__2); - } - } else { - iws = nw; - } - - if (nb < nbmin || nb >= *k) { - -/* Use unblocked code */ - - zunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ - c_offset], ldc, &work[1], &iinfo); - } else { - -/* Use blocked code */ - - if (left && ! notran || ! left && notran) { - i1 = 1; - i2 = *k; - i3 = nb; - } else { - i1 = (*k - 1) / nb * nb + 1; - i2 = 1; - i3 = -nb; - } - - if (left) { - ni = *n; - jc = 1; - } else { - mi = *m; - ic = 1; - } - - i__1 = i2; - i__2 = i3; - for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { -/* Computing MIN */ - i__4 = nb, i__5 = *k - i__ + 1; - ib = min(i__4,i__5); - -/* - Form the triangular factor of the block reflector - H = H(i) H(i+1) . . . H(i+ib-1) -*/ - - i__4 = nq - i__ + 1; - zlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ * - a_dim1], lda, &tau[i__], t, &c__65) - ; - if (left) { - -/* H or H' is applied to C(i:m,1:n) */ - - mi = *m - i__ + 1; - ic = i__; - } else { - -/* H or H' is applied to C(1:m,i:n) */ - - ni = *n - i__ + 1; - jc = i__; - } - -/* Apply H or H' */ - - zlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[ - i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * - c_dim1], ldc, &work[1], &ldwork); -/* L10: */ - } - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNMQR */ - -} /* zunmqr_ */ - -/* Subroutine */ int zunmtr_(char *side, char *uplo, char *trans, integer *m, - integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, - doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, - integer *info) -{ - /* System generated locals */ - address a__1[2]; - integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; - char ch__1[2]; - - /* Builtin functions */ - /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); - - /* Local variables */ - static integer i1, i2, nb, mi, ni, nq, nw; - static logical left; - extern logical lsame_(char *, char *); - static integer iinfo; - static logical upper; - extern /* Subroutine */ int xerbla_(char *, integer *); - extern integer ilaenv_(integer *, char *, char *, integer *, integer *, - integer *, integer *, ftnlen, ftnlen); - static integer lwkopt; - static logical lquery; - extern /* Subroutine */ int zunmql_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, - integer *, doublecomplex *, integer *, doublecomplex *, - doublecomplex *, integer *, doublecomplex *, integer *, integer *); - - -/* - -- LAPACK routine (version 3.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - June 30, 1999 - - - Purpose - ======= - - ZUNMTR overwrites the general complex M-by-N matrix C with - - SIDE = 'L' SIDE = 'R' - TRANS = 'N': Q * C C * Q - TRANS = 'C': Q**H * C C * Q**H - - where Q is a complex unitary matrix of order nq, with nq = m if - SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of - nq-1 elementary reflectors, as returned by ZHETRD: - - if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); - - if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). - - Arguments - ========= - - SIDE (input) CHARACTER*1 - = 'L': apply Q or Q**H from the Left; - = 'R': apply Q or Q**H from the Right. - - UPLO (input) CHARACTER*1 - = 'U': Upper triangle of A contains elementary reflectors - from ZHETRD; - = 'L': Lower triangle of A contains elementary reflectors - from ZHETRD. - - TRANS (input) CHARACTER*1 - = 'N': No transpose, apply Q; - = 'C': Conjugate transpose, apply Q**H. - - M (input) INTEGER - The number of rows of the matrix C. M >= 0. - - N (input) INTEGER - The number of columns of the matrix C. N >= 0. - - A (input) COMPLEX*16 array, dimension - (LDA,M) if SIDE = 'L' - (LDA,N) if SIDE = 'R' - The vectors which define the elementary reflectors, as - returned by ZHETRD. - - LDA (input) INTEGER - The leading dimension of the array A. - LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. - - TAU (input) COMPLEX*16 array, dimension - (M-1) if SIDE = 'L' - (N-1) if SIDE = 'R' - TAU(i) must contain the scalar factor of the elementary - reflector H(i), as returned by ZHETRD. - - C (input/output) COMPLEX*16 array, dimension (LDC,N) - On entry, the M-by-N matrix C. - On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. - - LDC (input) INTEGER - The leading dimension of the array C. LDC >= max(1,M). - - WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) - On exit, if INFO = 0, WORK(1) returns the optimal LWORK. - - LWORK (input) INTEGER - The dimension of the array WORK. - If SIDE = 'L', LWORK >= max(1,N); - if SIDE = 'R', LWORK >= max(1,M). - For optimum performance LWORK >= N*NB if SIDE = 'L', and - LWORK >=M*NB if SIDE = 'R', where NB is the optimal - blocksize. - - If LWORK = -1, then a workspace query is assumed; the routine - only calculates the optimal size of the WORK array, returns - this value as the first entry of the WORK array, and no error - message related to LWORK is issued by XERBLA. - - INFO (output) INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - - ===================================================================== - - - Test the input arguments -*/ - - /* Parameter adjustments */ - a_dim1 = *lda; - a_offset = 1 + a_dim1; - a -= a_offset; - --tau; - c_dim1 = *ldc; - c_offset = 1 + c_dim1; - c__ -= c_offset; - --work; - - /* Function Body */ - *info = 0; - left = lsame_(side, "L"); - upper = lsame_(uplo, "U"); - lquery = *lwork == -1; - -/* NQ is the order of Q and NW is the minimum dimension of WORK */ - - if (left) { - nq = *m; - nw = *n; - } else { - nq = *n; - nw = *m; - } - if (! left && ! lsame_(side, "R")) { - *info = -1; - } else if (! upper && ! lsame_(uplo, "L")) { - *info = -2; - } else if (! lsame_(trans, "N") && ! lsame_(trans, - "C")) { - *info = -3; - } else if (*m < 0) { - *info = -4; - } else if (*n < 0) { - *info = -5; - } else if (*lda < max(1,nq)) { - *info = -7; - } else if (*ldc < max(1,*m)) { - *info = -10; - } else if (*lwork < max(1,nw) && ! lquery) { - *info = -12; - } - - if (*info == 0) { - if (upper) { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "ZUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "ZUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } else { - if (left) { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *m - 1; - i__3 = *m - 1; - nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } else { -/* Writing concatenation */ - i__1[0] = 1, a__1[0] = side; - i__1[1] = 1, a__1[1] = trans; - s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); - i__2 = *n - 1; - i__3 = *n - 1; - nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( - ftnlen)6, (ftnlen)2); - } - } - lwkopt = max(1,nw) * nb; - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - } - - if (*info != 0) { - i__2 = -(*info); - xerbla_("ZUNMTR", &i__2); - return 0; - } else if (lquery) { - return 0; - } - -/* Quick return if possible */ - - if (*m == 0 || *n == 0 || nq == 1) { - work[1].r = 1., work[1].i = 0.; - return 0; - } - - if (left) { - mi = *m - 1; - ni = *n; - } else { - mi = *m; - ni = *n - 1; - } - - if (upper) { - -/* Q was determined by a call to ZHETRD with UPLO = 'U' */ - - i__2 = nq - 1; - zunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & - tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo); - } else { - -/* Q was determined by a call to ZHETRD with UPLO = 'L' */ - - if (left) { - i1 = 2; - i2 = 1; - } else { - i1 = 1; - i2 = 2; - } - i__2 = nq - 1; - zunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & - c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo); - } - work[1].r = (doublereal) lwkopt, work[1].i = 0.; - return 0; - -/* End of ZUNMTR */ - -} /* zunmtr_ */ - diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py index adc8f1784..6c2d8d0ca 100644 --- a/numpy/linalg/setup.py +++ b/numpy/linalg/setup.py @@ -15,11 +15,14 @@ def configuration(parent_package='', top_path=None): src_dir = 'lapack_lite' lapack_lite_src = [ os.path.join(src_dir, 'python_xerbla.c'), - os.path.join(src_dir, 'zlapack_lite.c'), - os.path.join(src_dir, 'dlapack_lite.c'), - os.path.join(src_dir, 'blas_lite.c'), + os.path.join(src_dir, 'f2c_z_lapack.c'), + os.path.join(src_dir, 'f2c_c_lapack.c'), + os.path.join(src_dir, 'f2c_d_lapack.c'), + os.path.join(src_dir, 'f2c_s_lapack.c'), + os.path.join(src_dir, 'f2c_lapack.c'), + os.path.join(src_dir, 'f2c_blas.c'), + os.path.join(src_dir, 'f2c.c'), os.path.join(src_dir, 'dlamch.c'), - os.path.join(src_dir, 'f2c_lite.c'), ] all_sources = config.paths(lapack_lite_src) -- cgit v1.2.1